class QerrorCommand(NNToolShellBase): # QERROR COMMAND parser_qerror = Cmd2ArgumentParser() parser_qerror.add_argument('-s', '--step', action='store_true', help='evaluate quantization per step. i.e.\ individually quantize each layer') parser_qerror.add_argument('--compare_quantized', action='store_true', help='quantize and dequantize the float output \ to give it the same error as the quantized output of the layer' ) parser_qerror.add_argument( '-r', '--report_lowest', type=int, help='QSNR threshold below which to report filename') table_options(parser_qerror, default_width=140) input_options(parser_qerror) @with_argparser(parser_qerror) @no_history def do_qerror(self, args): """ Show quantization error introduced by processing one or more input files.""" self._check_graph() self._check_quantized() fmt = ('tab' if args.output is None else args.output['fmt']) input_args = self._get_input_args(args) if args.step: stats_collector = StepErrorStatsCollector( quant_compare=args.compare_quantized) else: stats_collector = ErrorStatsCollector( quant_compare=args.compare_quantized) cnt = 0 for file_per_input in glob_input_files(args.input_files, self.G.num_inputs): cnt += 1 data = [ import_data(input_file, **input_args) for input_file in file_per_input ] stat = stats_collector.collect_stats(self.G, data) if args.report_lowest is not None: lowest = min((elem['qsnr'] for elem in stat.values())) if lowest < args.report_lowest: self.pfeedback( "{} had QSNR below threshold".format(file_per_input)) if not cnt: self.perror("no files to process") return tab = ErrorReporter(do_totals=(fmt != "csv"), one_input=cnt <= 1, with_chan=args.step)\ .report(self.G, stats_collector.reduce_stats()) output_table(tab, args)
class AstatsCommand(NNToolShellBase): # ASTATS COMMAND parser_astats = Cmd2ArgumentParser() parser_astats.add_argument('-q', '--qsnr', type=float, default=30.0, help='QSNR threshold') parser_astats.add_argument('-d', '--detail', action="store_true", help='Show fusions detail') parser_astats.add_argument( '-s', '--step', type=int, nargs=(1, 2), help= 'display information by channel for step. You can indicate a fusion step with two values. The step_idx and the idx of the node in the fusion.' ) table_options(parser_astats, default_width=180) input_options(parser_astats) @with_argparser(parser_astats) @no_history def do_astats(self, args: argparse.Namespace): """ Calculate activation statistics on one or more input files.""" self._check_graph() input_args = self._get_input_args(args) stats_collector = ActivationStatsCollector() step_idx = args.step if step_idx is not None: if len(step_idx) == 1: step_idx = step_idx[0] else: step_idx = tuple(step_idx) if len(args.input_files) == 0: self.perror("You must enter some files to process") return for file_per_input in glob_input_files(args.input_files, self.G.num_inputs): LOG.info("input file %s", file_per_input) data = [ import_data(input_file, **input_args) for input_file in file_per_input ] stats_collector.collect_stats(self.G, data) fmt = ('tab' if args.output is None else args.output['fmt']) tab = ActivationReporter(do_totals=(fmt != "csv"), threshold=args.qsnr, yield_fusions=args.detail or isinstance(step_idx, tuple)).report( self.G, stats_collector.reduce_stats()) output_table(tab, args)
class BcorrCommand(NNToolShellBase): # BCORR COMMAND parser_bcorr = Cmd2ArgumentParser() input_options(parser_bcorr) @with_argparser(parser_bcorr) def do_bcorr(self, args): """ Correct biases with average quantization error.""" self._check_graph() self._check_quantized() stats_collector = StepErrorStatsCollector() input_args = self._get_input_args(args) cnt = 0 for file_per_input in glob_input_files(args.input_files, self.G.num_inputs): cnt += 1 data = [import_data(filename, **input_args) for filename in file_per_input] stats_collector.collect_stats(self.G, data) adjust_biases(self.G, stats_collector.reduce_stats())
class ValidationCommand(NNToolShellBase): # VAL COMMAND parser_val = Cmd2ArgumentParser() parser_val.add_argument( '-q', '--quantize', action='store_true', help='quantize the graph (must have already set quantization)') parser_val.add_argument('-s', '--silent', action='store_true', help='do not print progress for each input') parser_val.add_argument('--dataset_dir', completer_method=Cmd.path_complete, help='path to the directory of samples for test') parser_val.add_argument( '--class_thr', default=0, type=float, help='Threshold to consider a valid classification') parser_val.add_argument('--progress_every', default=100, type=int, help='print accuracy every n computed predictions') parser_val.add_argument('--binary_classification', action='store_true', help='output is one dimensional') parser_val.add_argument('--prediction_step_idx', default=-1, type=int, help='graph step index for the predicted value,\ default is the last layer of the network') parser_val_group = parser_val.add_mutually_exclusive_group(required=False) parser_val_group.add_argument( '--label_json', default=None, completer_method=Cmd.path_complete, help='path to the .json object containing labels annotation \ { "filename0" : label0, "filename1": label1, ... }' ) parser_val_group.add_argument( '--class_number', default=None, type=int, help='Number of a single class that all should match') parser_val_group.add_argument( '--vww_instances_file', default=None, completer_method=Cmd.path_complete, help='path to the .json object containing labels instances\ with the visualwakewords format:\ instances = {images, annotations, categories}\ instances["images"] = { file_name:.., image_id:.. }\ instances["annotations"] = { image_id:.., label:..}' ) input_options(parser_val) @with_argparser(parser_val) @no_history def do_validate(self, args: argparse.Namespace): """ Validate the model (quantized [-q] or not) in terms of prediction accuracy rate on a given dataset (images folder). Ground truth labels can be embedded in files names ("filename_03.[png, ppm, pgm]", the number of digits must be coherent with the number of networks outputs: e.g. in a 1000 classes problem the last digits must be 3, "file_45.png" will raise an error) or can be written in a .json object (example: {'file0':label0, 'file1':label1, ...}) and given to the function with --label_json """ self._check_graph() if args.quantize: self._check_quantized() qmode = QuantizationMode.all_dequantize() else: qmode = QuantizationMode.none() LOG.info("quantization mode - %s", qmode) input_args = self._get_input_args(args) good_predictions = [] good_margin = 0 bad_margin = 0 number_samples = sum(1 for _ in glob_input_files(args.input_files)) if args.vww_instances_file: validation = ValidateFromVWWInstances( args.vww_instances_file, class_thr=args.class_thr, binary_classification=args.binary_classification) elif args.label_json: validation = ValidateFromJSON( args.label_json, class_thr=args.class_thr, binary_classification=args.binary_classification) elif args.class_number is not None: validation = ValidateFromClass( args.class_number, class_thr=args.class_thr, binary_classification=args.binary_classification) else: validation = ValidateFromName( class_thr=args.class_thr, binary_classification=args.binary_classification) try: ExecutionProgress.start() for i, file_per_input in enumerate( glob_input_files(args.input_files, self.G.num_inputs)): if not args.silent: LOG.info("input file %s", file_per_input) data = [ import_data(input_file, **input_args) for input_file in file_per_input ] executer = GraphExecuter(self.G, qrecs=self.G.quantization) outputs = executer.execute(data, qmode=qmode, silent=args.silent) predicted_values = np.asarray( outputs[args.prediction_step_idx]) good_prediction, class_predicted, real_class, margin = validation.validate( file_per_input[0], predicted_values) good_predictions.append(good_prediction) if good_prediction: good_margin += margin else: bad_margin += margin if not args.silent: LOG.info( 'Prediction is %s predicted %s correct %s margin %s', good_prediction, class_predicted, real_class, margin) if not i % args.progress_every and i > 0: LOG.info( 'ACCURACY: %.3f %%', 100 * sum(good_predictions) / len(good_predictions)) ExecutionProgress.progress(i, number_samples) ExecutionProgress.end() except (KeyboardInterrupt, SystemExit): pass self.py_locals['labels'] = validation.labels self.py_locals['predictions'] = validation.predictions cnt = len(good_predictions) if cnt: ngood = sum(good_predictions) nbad = cnt - ngood if nbad: LOG.info( "%s out of %s predicted falsly with %s average margin", nbad, cnt, bad_margin / nbad) if ngood: LOG.info( "%s out of %s predicted correctly with %s average margin", ngood, cnt, good_margin / ngood) accuracy_rate = 100 * sum(good_predictions) / len(good_predictions) LOG.info('Total accuracy: %.3f %%', accuracy_rate)
class DumpCommand(NNToolShellBase): # DUMP COMMAND parser_dump = Cmd2ArgumentParser() parser_dump.add_argument('-s', '--step', type=int, help='step to dump output of', default=None) parser_dump.add_argument('-w', '--number_width', type=int, help='width of numbers', default=8) parser_dump.add_argument('-p', '--precision', type=int, help='number of decimal places', default=4) parser_dump.add_argument('-c', '--channel', type=int, help='channel to dump', default=None) parser_dump.add_argument('-d', '--dequantize', action='store_true', help='dequantize result') parser_dump.add_argument('--quantize_and_dequantize', action='store_true', help='quantize and dequantize float results') parser_dump_group = parser_dump.add_mutually_exclusive_group( required=False) parser_dump_group.add_argument( '-q', '--quantize', action='store_true', help='quantize the graph (must have already set quantization)') parser_dump_group.add_argument( '-Q', '--quantize_step', type=int, help='quantize a step of the graph (must have already' + ' set quantization)', default=None) parser_dump_group.add_argument( '-A', '--quantize_all_steps', action='store_true', help='quantize all steps of the graph feeding' + ' unquantized float data into each step') parser_dump.add_argument( '-P', '--pickle', completer_method=Cmd.path_complete, help='pickle all the outputed tensors to this file') parser_dump.add_argument('-S', '--save', help='save the tensor to the tensors list') parser_dump.add_argument( '-v', '--visualize_detection', action='store_true', help='visualize input images and detection predictions') parser_dump.add_argument('--checksum', action='store_true', help='print checksums') input_options(parser_dump) @with_argparser(parser_dump) @no_history def do_dump(self, args: argparse.Namespace): """ Dump the activations resulting from running an input file through the graph. You can use the current quantization settings and can also just quantify one specific step of the graph.""" self._check_graph() dequantize = args.dequantize if args.dequantize is not None\ else not (args.pickle or args.save) if args.quantize or args.quantize_step or args.quantize_all_steps: self._check_quantized() if args.quantize: if dequantize: qmode = QuantizationMode.all_dequantize() else: qmode = QuantizationMode.all() elif args.quantize_all_steps: qmode = QuantizationMode.step_all() dequantize = True else: qmode = QuantizationMode.step(args.quantize_step) elif args.quantize_and_dequantize: qmode = QuantizationMode.all_float_quantize_dequantize() else: qmode = QuantizationMode.none() if args.step is not None: step = args.step num_steps = len(self.G.graph_state.steps) if step < 0: step = num_steps + step if step < 0 or step > num_steps: self.perror("step must be from {} to {}".format( -num_steps, num_steps)) return else: step = None input_args = self._get_input_args(args) pickles = [] for file_per_input in glob_input_files(args.input_files, self.G.num_inputs): LOG.info("input file %s", file_per_input) data = [ import_data(input_file, **input_args) for input_file in file_per_input ] executer = GraphExecuter(self.G, qrecs=self.G.quantization) outputs = executer.execute(data, step_idx_limit=step, qmode=qmode) if args.pickle or self._in_py or args.save: pickles.append(outputs) else: self.G.print_intermediates(outputs, limit=step, width=args.number_width, precision=args.precision, channel=args.channel, order=['c', 'h', 'w'], checksum=args.checksum) if args.visualize_detection: img_in = Image.open(file_per_input[0]).convert('RGBA') height = img_in.size[1] if input_args[ 'height'] == -1 else input_args['height'] width = img_in.size[0] if input_args[ 'width'] == -1 else input_args['width'] img_in = img_in.resize((width, height)) if self.G.has_ssd_postprocess: bboxes, classes, scores, _ = [ outputs[graph_out.step_idx][0] for graph_out in self.G.outputs() ] draw = ImageDraw.Draw(img_in, 'RGBA') for box, score, class_id in zip(bboxes, scores, classes): if args.quantize and not args.dequantize: ssd_node = [ node for node in self.G.nodes() if isinstance(node, SSDDetectorParameters) ][0] ssd_qrec = self.G.quantization[NodeId(ssd_node)] x0, x1 = int(box[1] * width * ssd_qrec.out_qs[0].scale), int( box[3] * width * ssd_qrec.out_qs[0].scale) y0, y1 = int(box[0] * height * ssd_qrec.out_qs[0].scale), int( box[2] * height * ssd_qrec.out_qs[0].scale) score = score * ssd_qrec.out_qs[2].scale else: x0, x1 = int(box[1] * width), int(box[3] * width) y0, y1 = int(box[0] * height), int(box[2] * height) rect_points = (x0, y0), (x1, y0), (x1, y1), (x0, y1), (x0, y0) draw.line(rect_points, fill='red', width=2) txt = '{}@{}%'.format(class_id, int(score * 100)) draw.text([x0, y0 - 10], txt, fill=(0, 255, 0)) img_in.show() if args.pickle or args.save or self._in_py: if not pickles: self.perror("no input files found") return if len(args.input_files) == self.G.num_inputs: pickles = pickles[0] if args.pickle: with open(args.pickle, 'wb') as pickle_fp: pickle.dump(pickles, pickle_fp) if args.save: if len(args.input_files) != self.G.num_inputs: self.perror( "can only save dumps on one input to tensor store") return self.tensor_store[args.save] = pickles if self._in_py: self.last_result = pickles
class AquantCommand(NNToolShellBase): # AQUANT COMMAND parser_aquant = Cmd2ArgumentParser() parser_aquant_group = parser_aquant.add_mutually_exclusive_group( required=False) parser_aquant_group.add_argument( '-q', '--qsnr', type=float, default=50.0, help='QSNR threshold in case of POW2 scheme') parser_aquant_group.add_argument( '-f', '--force_width', choices=STATS_BITS, type=int, default=16, help='force all layers to this bit-width in case of POW2 scheme, ' + 'SQ8 will automatically force 8-bits') parser_aquant.add_argument( '-s', '--scheme', type=str, choices=QUANTIZATION_SCHEMES, default='SQ8', help= 'quantize with scaling factors (TFlite quantization-like) [default] or POW2' ) parser_aquant.add_argument('-d', '--quant_dimension', choices=['tensor', 'channel'], default='channel') parser_aquant.add_argument( '-r', '--relun_threshold', type=int, default=1, help='Threshold above floored max value to adjust relun\'s to.') parser_aquant.add_argument( '-n', '--no_narrow_weights', action='store_true', help='Don\'t quantize weights uniformly over negative/positive ' + 'range. i.e. Avoid -128 vs 127') input_options(parser_aquant) @with_argparser(parser_aquant) def do_aquant(self, args: argparse.Namespace): """ Attempt to calculate quantization for graph using one or more sample imput files.""" self._check_graph() input_args = self._get_input_args(args) processed_input = False stats_collector = ActivationStatsCollector() for file_per_input in glob_input_files(args.input_files, self.G.num_inputs): LOG.info("input file %s", file_per_input) processed_input = True data = [ import_data(input_file, **input_args) for input_file in file_per_input ] stats_collector.collect_stats(self.G, data) if not processed_input: self.perror("No imput files found") return astats = stats_collector.reduce_stats() if args.scheme == 'SQ8': quantizer = MultQuantizer( astats, 8, quantized_dimension=args.quant_dimension, narrow_weights=not args.no_narrow_weights) else: stats_collector = FilterStatsCollector() fstats = stats_collector.collect_stats(self.G) quantizer = SymmetricQuantizer(astats, fstats, force_width=args.force_width, min_qsnr=args.qsnr) qrecs = quantizer.quantize(self.G) self.G.quantization = qrecs if args.scheme == 'SQ8': concats_matcher = EqualizeSymmetricMultiplicativeQuantivedConcats() concats_matcher.match(self.G, set_identity=False) softmax_qrec_matcher = PropagateSoftmaxSymQrec() softmax_qrec_matcher.match(self.G, set_identity=False) LOG.info("Quantization set. Use qshow command to see it.")
class AquantCommand(NNToolShellBase): # AQUANT COMMAND parser_aquant = Cmd2ArgumentParser() parser_aquant_group = parser_aquant.add_mutually_exclusive_group( required=False) parser_aquant_group.add_argument( '-f', '--force_width', choices=STATS_BITS, type=int, default=16, help='force all layers to this bit-width in case of POW2 scheme, ' + 'SQ8 will automatically force 8-bits') parser_aquant.add_argument( '-s', '--scheme', type=str, choices=QUANTIZATION_SCHEMES, default='SQ8', help= 'quantize with scaling factors (TFlite quantization-like) [default] or POW2' ) parser_aquant.add_argument('-d', '--quant_dimension', choices=['tensor', 'channel'], default='channel') parser_aquant.add_argument( '-n', '--no_narrow_weights', action='store_true', help='Don\'t quantize weights uniformly over negative/positive ' + 'range. i.e. Avoid -128 vs 127') input_options(parser_aquant) @with_argparser(parser_aquant) @store_once_in_history def do_aquant(self, args: argparse.Namespace): """ Attempt to calculate quantization for graph using one or more sample input files.""" self._check_graph() stats_collector = ActivationRangesCollector() # if replaying state file then load the activation stats if they are present if args.scheme == 'SQ8': bits = 8 else: bits = args.force_width if self.replaying_history and self.history_stats: astats = self.history_stats else: input_args = self._get_input_args(args) processed_input = False for file_per_input in glob_input_files(args.input_files, self.G.num_inputs): LOG.info("input file %s", file_per_input) processed_input = True data = [ import_data(input_file, **input_args) for input_file in file_per_input ] stats_collector.collect_stats(self.G, data) if not processed_input: self.perror("No input files found") return astats = stats_collector.stats self._record_stats(astats) quantizer = UnifiedQuantizer(args.scheme, astats, quantized_dimension=args.quant_dimension, narrow_weights=not args.no_narrow_weights, bits=bits) qrecs = quantizer.quantize(self.G) self.G.quantization = qrecs # These should now be unnecessary # if args.scheme == 'SQ8': # concats_matcher = EqualizeSymmetricMultiplicativeQuantivedConcats() # concats_matcher.match(self.G, set_identity=False) # rnns_matcher = PropagateUpRNNInputQ() # rnns_matcher.match(self.G, set_identity=False) # softmax_qrec_matcher = PropagateSoftmaxSymQrec() # softmax_qrec_matcher.match(self.G, set_identity=False) # sig_swish_qrec_matcher = PropagateUpSigSwishInputQ() # sig_swish_qrec_matcher.match(self.G, set_identity=False) LOG.info("Quantization set. Use qshow command to see it.")
class GenProjectCommand(NNToolShellBase): # GEN PROJECT COMMAND parser_gen_proj = Cmd2ArgumentParser() parser_gen_proj.add_argument('project_folder', completer_method=Cmd.path_complete, help='project folder to create or update') parser_gen_proj.add_argument('-o', '--overwrite', action='store_true', help='overwrite existing files') parser_gen_proj.add_argument( '--test_results', action='store_true', help='generate fake inputs in the quantization range ' 'and generate a check of the results') parser_gen_proj.add_argument( '--atproject', action='store_true', help= 'generate a native autotiler project with the model already generated') parser_gen_proj.add_argument('--dump_tensors', action='store_true', help='print generated tensors') parser_gen_proj.add_argument( '--input_file', nargs='+', completer_method=Cmd.path_complete, default=None, help='if test_results, use this file to run inference') parser_gen_proj.add_argument( '--input_tensors', type=str, help='produce input tensors from tensor store with supplied name') parser_gen_proj.add_argument( '--save_inputs', action='store_true', help='if test_results, save the inputs in files also') parser_gen_proj.add_argument( '--tolerance', type=float, default=0.0, help="if test_results active, use this tolerance to check the results") input_options(parser_gen_proj) @with_argparser(parser_gen_proj) @no_history def do_gen_project(self, args): """ Generate a project for the current graph including Makefiles, template main file and nntool script. The script will be generated from the command history with open commands excluded and a final save_state command added.""" self._check_graph() self._check_quantized() self._check_adjusted() if args.input_tensors: if args.input_tensors not in self.tensor_store: self.perror( f'input tensor {args.input_tensors} not found in store') return store = self.tensor_store[args.input_tensors] input_tensors = [] for params in self.G.nodes(node_classes=InputParameters): input_tensors.append(store[params.step_idx][0]) else: input_tensors = None gen_project(self.G, self.settings, args.project_folder, self._cmd_history[self._graph_idx].copy(), overwrite=args.overwrite, performance=True, quantized=self.settings['load_quantization'], test_results=args.test_results, save_inputs=args.save_inputs, dump_tensors=args.dump_tensors, input_file=args.input_file, input_tensors=input_tensors, input_args=self._get_input_args(args), gen_atproject=args.atproject, tolerance=args.tolerance) self.pfeedback(f'project generated in {args.project_folder}')
class ValidationCommand(NNToolShellBase): # VAL COMMAND parser_val = Cmd2ArgumentParser() parser_val.add_argument('-s', '--silent', action='store_true', help='do not print progress for each input') parser_val.add_argument('--dataset_dir', completer_method=Cmd.path_complete, help='path to the directory of samples for test') parser_val.add_argument('--progress_every', default=100, type=int, help='print accuracy every n computed predictions') validation_options(parser_val) input_options(parser_val) @with_argparser(parser_val) @no_history def do_validate(self, args: argparse.Namespace): """ Validate the model (quantized [-q] or not) in terms of prediction accuracy rate on a given dataset (images folder). Ground truth labels can be embedded in files names ("filename_03.[png, ppm, pgm]", the number of digits must be coherent with the number of networks outputs: e.g. in a 1000 classes problem the last digits must be 3, "file_45.png" will raise an error) or can be written in a .json object (example: {'file0':label0, 'file1':label1, ...}) and given to the function with --label_json """ self._check_graph() if args.quantize: self._check_quantized() qmode = QuantizationMode.all_dequantize() else: qmode = QuantizationMode.none() LOG.info("quantization mode - %s", qmode) input_args = self._get_input_args(args) good_predictions = [] good_margin = 0 bad_margin = 0 number_samples = sum(1 for _ in glob_input_files(args.input_files)) validation = get_validator(args) try: ExecutionProgress.start() for i, file_per_input in enumerate(glob_input_files(args.input_files, self.G.num_inputs)): if not args.silent: LOG.info("input file %s", file_per_input) data = [import_data(input_file, **input_args) for input_file in file_per_input] executer = GraphExecuter(self.G, qrecs=self.G.quantization) outputs = executer.execute( data, qmode=qmode, silent=args.silent) predicted_values = np.asarray( outputs[args.prediction_step_idx]) good_prediction, class_predicted, real_class, margin = validation.validate( file_per_input[0], predicted_values) good_predictions.append(good_prediction) if good_prediction: good_margin += margin else: bad_margin += margin if not args.silent: LOG.info('Prediction is %s predicted %s correct %s margin %s', good_prediction, class_predicted, real_class, margin) if not i % args.progress_every and i > 0: LOG.info('ACCURACY: %.3f %%', 100 * sum(good_predictions)/len(good_predictions)) ExecutionProgress.progress(i, number_samples) ExecutionProgress.end() except (KeyboardInterrupt, SystemExit): pass self.py_locals['labels'] = validation.labels self.py_locals['predictions'] = validation.predictions cnt = len(good_predictions) if cnt: ngood = sum(good_predictions) nbad = cnt - ngood if nbad: LOG.info("%s out of %s predicted falsly with %s average margin", nbad, cnt, bad_margin / nbad) if ngood: LOG.info("%s out of %s predicted correctly with %s average margin", ngood, cnt, good_margin / ngood) accuracy_rate = 100*sum(good_predictions)/len(good_predictions) LOG.info('Total accuracy: %.3f %%', accuracy_rate)
class AquantCommand(NNToolShellBase): # AQUANT COMMAND parser_aquant = Cmd2ArgumentParser() parser_aquant.add_argument( '-f', '--force_width', choices=STATS_BITS, type=int, default=0, help='force all layers to this bit-width in case of POW2 scheme, ' + 'SQ8 will automatically force 8-bits') parser_aquant.add_argument( '-s', '--scheme', type=str, choices=QUANTIZATION_SCHEMES, default='SQ8', help= 'quantize with scaling factors (TFlite quantization-like) [default] or POW2' ) add_options_to_parser(parser_aquant) input_options(parser_aquant) @with_argparser(parser_aquant) @store_once_in_history def do_aquant(self, args: argparse.Namespace): """ Attempt to calculate quantization for graph using one or more sample input files.""" self._check_graph() stats_collector = ActivationRangesCollector() # if replaying state file then load the activation stats if they are present opts = get_options_from_args(args) if self.replaying_history and self.history_stats: astats = self.history_stats else: input_args = self._get_input_args(args) processed_input = False for file_per_input in glob_input_files(args.input_files, self.G.num_inputs): LOG.info("input file %s", file_per_input) processed_input = True data = [ import_data(input_file, **input_args) for input_file in file_per_input ] stats_collector.collect_stats(self.G, data) if not processed_input: self.perror("No input files found") return astats = stats_collector.stats self._record_stats(astats) if args.force_width: opts['bits'] = args.force_width quantizer = UnifiedQuantizer(args.scheme, astats, **opts) # clear the existing quantization self.G.quantization = None qrecs = quantizer.quantize(self.G) self.G.quantization = qrecs RemoveUnnecessaryQuantizeOperators().match(self.G) self.G.add_dimensions() LOG.info("Quantization set. Use qshow command to see it.")
class CompressCommand(NNToolShellBase): # COMPRESS COMMAND parser_compress = NNToolArguementParser() parser_compress.add_argument( 'step', nargs='?', help='constant input to compress. ' + NODE_SELECTOR_HELP, completer_method=NNToolShellBase.node_step_or_name_completer( allow_comma=True)) parser_compress.add_argument('--no_sparse', action="store_true", help='Do not check for sparsity') parser_compress.add_argument( '--force_sparse', action="store_true", help='Force these layers to use sparse bit (adds an extra bin)') parser_compress.add_argument( '--threshold', type=float, help='set values val>x>-val to 0 before clustering') parser_compress_sub = parser_compress.add_subparsers( title='compress subcommands', help='compression strategy for the selected layers') parser_compress_bits = parser_compress_sub.add_parser( 'bits', help='compress using a lookup index of a fixed number of bits') parser_compress_bits.add_argument( 'num_bits', choices=list(str(v) for v in range(2, 8)), help='number of bits to use for lookup table indexes') parser_compress_min_qnsr = parser_compress_sub.add_parser( 'min_qsnr', help='compress keeping the value QSNR above a minimum value') parser_compress_min_qnsr.add_argument('qsnr', type=int, help='QSNR to keep above') parser_compress_auto = parser_compress_sub.add_parser( 'auto', help= 'compress to a number of bits automatically using validation results') parser_compress_auto.add_argument('--finetune', action="store_true", help='Experimental layer finetuning') validation_options(parser_compress_auto) input_options(parser_compress_auto) parser_compress_clear = parser_compress_sub.add_parser( 'clear', help='clear compression on these nodes') parser_compress_off = parser_compress_sub.add_parser( 'off', help='disable compression on these nodes') parser_compress_on = parser_compress_sub.add_parser( 'on', help='enable compression on these nodes') parser_compress_save = parser_compress_sub.add_parser( 'save', help='save compression settings to a file in json format') parser_compress_save.add_argument('file', completer_method=Cmd.path_complete, help='file to save to') parser_compress_load = parser_compress_sub.add_parser( 'load', help='load compression settings from a file in json format') parser_compress_load.add_argument('file', completer_method=Cmd.path_complete, help='file to load from') parser_compress_bits.set_defaults(operation='bits') parser_compress_min_qnsr.set_defaults(operation='min_qsnr') parser_compress_auto.set_defaults(operation='auto') parser_compress_clear.set_defaults(operation='clear') parser_compress_off.set_defaults(operation='off') parser_compress_on.set_defaults(operation='on') parser_compress_save.set_defaults(operation='save') parser_compress_load.set_defaults(operation='load') @with_argparser(parser_compress) def do_compress(self, args): """ Compress graph constants for GAP9 compression engine. Compress with no arguments will list current compression settings. The compressed size in the results table includes the size of the codebook. In bits mode the amount of bits used for table indexes is specified directly. It should be from 2 to 8. In min_qsnr mode the tensors will be compressed with the number of bits necessary to stay above the given QSNR value. A value of around 30 is a good starting point. The auto mode uses the validation engine to explore possible tensor compression parameters. THe command options are the same as the validate command. The first part of the process tries to find the lowest QSNR that can be selected for compression of all viable parameters with no bad validation results. The threshold argument can be used to clip values to zero before compression. """ self._check_graph() if not args.step: if hasattr(args, 'operation'): nodes = self.G.nodes(node_classes=ConstantInputParameters) else: self.compress_make_table( self.G.nodes(node_classes=ConstantInputParameters)) return else: nodes, _ = self.get_node_step_or_name( args.step, classes=ConstantInputParameters, allow_comma=True) if not nodes: return if not args.operation: # No subcommand was provided, so call help self.do_help('compress') return if args.operation == "clear": for node in nodes: node.use_compressed = False node.compressed_value = None self.pfeedback(f'clear compression on {node.name}') return if args.operation == "on": for node in nodes: if node.compressed_value: node.use_compressed = True self.pfeedback(f'enable compression on {node.name}') else: self.perror( f"can't enable compression on {node.name} - not set") return if args.operation == "off": for node in nodes: node.use_compressed = False self.pfeedback(f'disable compression on {node.name}') return if args.operation == "save": save_map = {} for node in nodes: if node.compressed_value: comp_val = node.compressed_value save_map[node.name] = { 'bits': comp_val.bits, 'threshold': comp_val.threshold, 'sparse': bool(comp_val.sparse), } with open(args.file, 'w') as fp: fp.write(json.dumps(save_map)) return report_nodes = [] if args.operation == "load": with open(args.file, 'r') as fp: save_map = json.load(fp) for node in nodes: if node.name in save_map: self.pfeedback(f"Loading parameters for {node.name}") try: params = save_map[node.name] node.compress_value( bits=params['bits'], threshold=params['threshold'], allow_sparse=params['sparse'], force_sparse=params['sparse'], ) comp_val = node.compressed_value node.use_compressed = True except CompressionError as ex: self.pfeedback( f'unable to compress {node.name} - {ex}') comp_val = None if comp_val: report_nodes.append(node) return elif args.operation == "auto": if args.quantize: self._check_quantized() input_args = self._get_input_args(args) autocompress = AutoCompress( self.G, args.input_files, get_validator(args), input_args, prediction_step_idx=args.prediction_step_idx) def progress(msg, newline): print(msg, end='\n' if newline else '', flush=True) try: autocompress.tune_all(nodes, progress, quantize=args.quantize) if args.finetune: report_nodes = [ node for node in nodes if node.use_compressed ] self.compress_make_table(report_nodes) autocompress.finetune(nodes, progress, quantize=args.quantize) except (KeyboardInterrupt, SystemExit): pass report_nodes = [node for node in nodes if node.use_compressed] else: for node in nodes: self.pfeedback(f"Evaluating {node.name}") nid = NodeId(node) if self.G.quantization and nid in self.G.quantization: qtype = self.G.quantization[nid].out_qs[0] else: qtype = None try: kwargs = { 'qtype': qtype, 'threshold': args.threshold, 'allow_sparse': not args.no_sparse, 'force_sparse': args.force_sparse } if args.operation == "bits": kwargs['bits'] = int(args.num_bits) elif args.operation == "min_qsnr": kwargs['min_qsnr'] = args.qsnr elif args.operation == "auto": pass else: raise ValueError('strange operation') node.compress_value(**kwargs) comp_val = node.compressed_value node.use_compressed = True except CompressionError as ex: self.pfeedback(f'unable to compress {node.name} - {ex}') comp_val = None if comp_val: report_nodes.append(node) self.compress_make_table(report_nodes) def compress_make_table(self, nodes): comp_size = 0 node_size = 0 comp_report = [[ "Step", "Name", "Orig Size", "Compressed Size", "%age orig", "Bits", "Sparse", "Enabled" ]] for node in nodes: if not node.compressed_value: continue nid = NodeId(node) if self.G.quantization and nid in self.G.quantization: qbits = self.G.quantization[NodeId(node)].out_qs[0].bits else: qbits = 8 old_size = int(math.floor((node.value.size * qbits) / 8)) comp_val = node.compressed_value node_size += node.value.size comp_size += comp_val.size comp_report.append([ node.step_idx, node.name, old_size, comp_val.size, (100 * comp_val.size) // old_size, comp_val.bits, "Yes" if comp_val.sparse else "No", "Yes" if node.use_compressed else "No" ]) if comp_size == 0: self.pfeedback("no constants compressed") return comp_report.append([ "", "Total", node_size, comp_size, (100 * comp_size) // node_size, "", "", "" ]) table = texttable.Texttable() table.set_cols_align(['l', 'l', 'l', 'l', 'l', 'l', 'l', 'l']) table.set_max_width(120) table.add_rows(comp_report) self.pfeedback("Compression report\n") self.pfeedback(table.draw() + '\n')
class DumpCommand(NNToolShellBase): # DUMP COMMAND parser_dump = Cmd2ArgumentParser() parser_dump.add_argument('-s', '--step', type=int, help='step to dump output of', default=None) parser_dump.add_argument('-w', '--number_width', type=int, help='width of numbers', default=8) parser_dump.add_argument('-p', '--precision', type=int, help='number of decimal places', default=4) parser_dump.add_argument('-c', '--channel', type=int, help='channel to dump', default=None) parser_dump.add_argument('-d', '--dequantize', action='store_true', help='dequantize result') parser_dump.add_argument('--quantize_and_dequantize', action='store_true', help='quantize and dequantize float results') parser_dump_group = parser_dump.add_mutually_exclusive_group(required=False) parser_dump_group.add_argument('-q', '--quantize', action='store_true', help='quantize the graph (must have already set quantization)') parser_dump_group.add_argument('-Q', '--quantize_step', type=int, help='quantize a step of the graph (must have already' + ' set quantization)', default=None) parser_dump_group.add_argument('-A', '--quantize_all_steps', action='store_true', help='quantize all steps of the graph feeding' + ' unquantized float data into each step') parser_dump.add_argument('-P', '--pickle', completer_method=Cmd.path_complete, help='pickle all the outputed tensors to this file') parser_dump.add_argument('-S', '--save', help='save the tensor to the tensors list') input_options(parser_dump) @with_argparser(parser_dump) def do_dump(self, args: argparse.Namespace): """ Dump the activations resulting from running an input file through the graph. You can use the current quantization settings and can also just quantify one specific step of the graph.""" self._check_graph() dequantize = args.dequantize if args.dequantize is not None\ else not (args.pickle or args.save) if args.quantize or args.quantize_step or args.quantize_all_steps: self._check_quantized() if args.quantize: if dequantize: qmode = QuantizationMode.all_dequantize() else: qmode = QuantizationMode.all() elif args.quantize_all_steps: qmode = QuantizationMode.step_all() dequantize = True else: qmode = QuantizationMode.step(args.quantize_step) elif args.quantize_and_dequantize: qmode = QuantizationMode.all_float_quantize_dequantize() else: qmode = QuantizationMode.none() if args.step is not None: step = args.step num_steps = len(self.G.graph_state.steps) if step < 0: step = num_steps + step if step < 0 or step > num_steps: self.perror("step must be from {} to {}".format(-num_steps, num_steps)) return else: step = None input_args = self._get_input_args(args) pickles = [] for file_per_input in glob_input_files(args.input_files, self.G.num_inputs): LOG.info("input file %s", file_per_input) data = [import_data(input_file, **input_args) for input_file in file_per_input] executer = GraphExecuter(self.G, qrecs=self.G.quantization) outputs = executer.execute(data, step_idx_limit=step, qmode=qmode) if args.pickle or self._in_py or args.save: pickles.append(format_dump_file(self.G, outputs, not qmode.is_none, args.dequantize, args.quantize_step)) else: self.G.print_intermediates(outputs, limit=step, width=args.number_width, precision=args.precision, channel=args.channel, order=['c', 'h', 'w']) if args.pickle or args.save or self._in_py: if not pickles: self.perror("no input files found") return if len(args.input_files) == 1: pickles = pickles[0] if args.pickle: with open(args.pickle, 'wb') as pickle_fp: pickle.dump(pickles, pickle_fp) if args.save: self.tensor_store[args.save] = pickles if self._in_py: self.last_result = pickles