def equalize_model(model: torch.nn.Module, input_shapes: Union[Tuple, List[Tuple]]): """ High-level API to perform Cross-Layer Equalization (CLE) on the given model. The model is equalized in place. :param model: Model to equalize :param input_shapes: Shape of the input (can be a tuple or a list of tuples if multiple inputs) :return: None """ device = get_device(model) model.cpu() # fold batchnorm layers folded_pairs = fold_all_batch_norms(model, input_shapes) bn_dict = {} for conv_bn in folded_pairs: bn_dict[conv_bn[0]] = conv_bn[1] # replace any ReLU6 layers with ReLU utils.replace_modules_of_type1_with_type2(model, torch.nn.ReLU6, torch.nn.ReLU) # perform cross-layer scaling on applicable layer sets cls_set_info_list = CrossLayerScaling.scale_model(model, input_shapes) # high-bias fold HighBiasFold.bias_fold(cls_set_info_list, bn_dict) model.to(device=device)
def fold_all_batch_norms(model: torch.nn.Module, input_shapes: Union[Tuple, List[Tuple]]) -> \ List[Tuple[torch.nn.Module, torch.nn.BatchNorm2d]]: """ Fold all batch_norm layers in a model into corresponding conv layers :param model: Model :param input_shapes: Input shapes for the model (can be one or multiple inputs) :return: A list of pairs of layers [(Conv/Linear, BN layer that got folded)] """ # Find whether model is on GPU device = utils.get_device(model) # If model is not on CPU, convert it to CPU model.cpu() bn_conv_linear_pairs = find_all_batch_norms_to_fold(model, input_shapes) fold_given_batch_norms(model, bn_conv_linear_pairs) # When returning the pairs, we want the second element of the pair to be the BN pairs_to_return = [] for pair in bn_conv_linear_pairs: if isinstance(pair[0], torch.nn.BatchNorm2d): pairs_to_return.append((pair[1], pair[0])) else: pairs_to_return.append(pair) model.to(device=device) return pairs_to_return
def create_connected_graph_with_input_shapes(model: torch.nn.Module, input_shapes: Union[Tuple, List[Tuple]]) \ -> ConnectedGraph: """ Create connected graph, using random inputs generated from given input shapes. :param model: torch model to create a connected graph from :param input_shapes: input shapes to the torch model :return: ConnectedGraph representation of the model """ random_inputs = create_rand_tensors_given_shapes(input_shapes) device = get_device(model) random_inputs = tuple([inp.to(device) for inp in random_inputs]) return ConnectedGraph(model, random_inputs)
def compute_and_save_weight_encodings(self, path: str, filename_prefix: str, input_shape: Union[Tuple, List[Tuple]]): """ Save the quantized model weight encodings :param path: path where to store model pth and encodings :param filename_prefix: filename to store exported weight encodings in json format :param input_shape: shape of the input parameter to the model :return: None """ device = utils.get_device(self._model) self._model.cpu() inputs = utils.create_rand_tensors_given_shapes(input_shape) # compute weight encodings weight_encoding_dict = {} weight_encoding_dict_with_onnx_names = {} quantized_layers = self.__get_qc_quantized_layers(self._model) pytorch_onnx_names_dict = su.SaveUtils.get_name_of_op_from_graph( self._model, *inputs) for layer_name, layer in quantized_layers: if isinstance(layer, QcQuantizeWrapper): layer_wt_encoding = layer.compute_weight_encodings() # skip dictionary update for no weight encoding case if layer_wt_encoding is not None: value = (layer_wt_encoding.max, layer_wt_encoding.min, layer_wt_encoding.delta, layer_wt_encoding.offset, layer_wt_encoding.bw) weight_encoding_dict[layer_name] = value if layer_name in pytorch_onnx_names_dict: weight_encoding_dict_with_onnx_names[ pytorch_onnx_names_dict[layer_name]] = value # export weight encodings to output json file su.SaveUtils.save_weight_encodings_to_files( path=path, filename_prefix=filename_prefix, weight_encoding_dict=weight_encoding_dict, weight_encoding_dict_with_onnx_names= weight_encoding_dict_with_onnx_names) self._model.to(device)
def find_all_conv_bn_with_activation(model: torch.nn.Module, input_shape: Tuple) -> Dict: """ Uses searcher to find preceding and next bn layers for a conv/linear layer :param model: PyTorch model :param input_shape: shape of input to the model :return: dictionary of conv/linear layers with associated bn op / activation info """ activation_types = ['relu', 'hardtanh'] # initialize all patterns to be matched and associated call back functions patterns_with_callbacks = [] layer_select_handler = ConvBnPatternHandler() patterns_with_callbacks.append( PatternType(pattern=['batch_norm', 'convolution'], action=layer_select_handler)) patterns_with_callbacks.append( PatternType(pattern=['convolution'], action=layer_select_handler)) patterns_with_callbacks.append( PatternType(pattern=['addmm'], action=layer_select_handler)) for activation in activation_types: patterns_with_callbacks.append( PatternType(pattern=['batch_norm', activation, 'convolution'], action=layer_select_handler)) device = utils.get_device(model) connected_graph = ConnectedGraph(model, (torch.rand(input_shape).to(device), )) # create graph searcher instance with connected graph and patterns to search graph_searcher = GraphSearcher(connected_graph, patterns_with_callbacks) # get all conv/linear and bn info graph_searcher.find_all_patterns_in_graph_apply_actions() convs_bn_activation_dict = layer_select_handler.get_conv_linear_bn_info_dict( ) return convs_bn_activation_dict
def visualize_weight_ranges_single_layer(layer, layer_name, scatter_plot=False): """ Given a layer, visualizes weight ranges with scatter plots and line plots :param layer: layer with weights :param layer_name: layer name :param scatter_plot: Include scatter plot in plots :return: None """ device = get_device(layer) layer.cpu() layer_weights = pd.DataFrame(get_weights(layer)) layer_weights_summary_statistics = layer_weights.describe().T line_plots = line_plot_summary_statistics_model( layer_name=layer_name, layer_weights_data_frame=layer_weights_summary_statistics, width=1000, height=700) if scatter_plot: scatter_plot_mean, scatter_plot_min = scatter_plot_summary_stats( layer_weights_summary_statistics, x_axis_label_mean="Mean Weights Per Output Channel", y_axis_label_mean="Std Per Output Channel", title_mean="Mean vs Standard Deviation: " + layer_name, x_axis_label_min="Min Weights Per Output Channel", y_axis_label_min="Max Weights Per Output Channel", title_min="Minimum vs Maximum: " + layer_name) scatter_plots_layout = row(scatter_plot_mean, scatter_plot_min) layout = column(scatter_plots_layout, line_plots) else: layout = line_plots layout_with_title = add_title(layout, layer_name) # Move layer back to device layer.to(device=device) return layout_with_title
def scale_model( model: torch.nn.Module, input_shapes: Union[Tuple, List[Tuple]]) -> List[ClsSetInfo]: """ Uses cross-layer scaling to scale all applicable layers in the given model :param model: Model to scale :param input_shapes: Input shape for the model (can be one or multiple inputs) :return: CLS information for each CLS set """ device = get_device(model) model.cpu() # Find layer groups graph_search = GraphSearchUtils(model, input_shapes) layer_groups = graph_search.find_layer_groups_to_scale() # Find cls sets from the layer groups cls_sets = [] for layer_group in layer_groups: cls_set = GraphSearchUtils.convert_layer_group_to_cls_sets( layer_group) cls_sets += cls_set # Scale the CLS sets scale_factors = CrossLayerScaling.scale_cls_sets(cls_sets) # Find if there were relu activations between layers of each cls set is_relu_activation_in_cls_sets = graph_search.is_relu_activation_present_in_cls_sets( cls_sets) # Convert to a list of cls-set-info elements cls_set_info_list = CrossLayerScaling.create_cls_set_info_list( cls_sets, scale_factors, is_relu_activation_in_cls_sets) model.to(device=device) return cls_set_info_list
def _forward_pass(model: torch.nn.Module, batch: Union[torch.Tensor, List, Tuple]): """ forward pass depending model allocation on CPU / GPU till StopForwardException :param model: model :param batch: batch """ # keep the model in eval mode model.eval() # get the model's device placement information device = utils.get_device(model) # place the batch to appropriate device batch = utils.change_tensor_device_placement(batch, device) if isinstance(batch, torch.Tensor): batch = [batch] try: with torch.no_grad(): _ = model(*batch) except StopForwardException: pass
def correct_bias(model: torch.nn.Module, quant_params: qsim.QuantParams, num_quant_samples: int, data_loader, num_bias_correct_samples: int, conv_bn_dict: Union[Dict[torch.nn.Module, ConvBnInfoType], None] = None, perform_only_empirical_bias_corr: bool = True, layers_to_ignore: List[torch.nn.Module] = None): """ Corrects bias for each Conv layer of model (unless ignored). A combination of Analytical and Empirical Bias Correction is used i.e. all the layers which can be corrected using Analytical Bias Correction are corrected using Analytical Bias Correction and remaining layers are corrected using Empirical method. Returns an in-place corrected floating point model :param model: Model to be corrected :param quant_params: Named tuple for quantization simulation for bias correction :param num_quant_samples: number of samples of images to pass through quantization sim for bias correction. :param data_loader: data loader for the model :param num_bias_correct_samples: number of samples for Bias correction :param conv_bn_dict: Dict of conv and bn with information related to activation. If None, the function calc it :param perform_only_empirical_bias_corr: Default True. If true will perform only empirical Bias Corr for all layers irrespective of the fact that layer is eligible for Analytical Bias Corr. :param layers_to_ignore: list of layer names for which we need to skip bias correction. """ if layers_to_ignore is None: layers_to_ignore = [] # Find batch size and shape of input tensor batch_size, input_shape = utils.get_input_shape_batch_size(data_loader) # Rounding up number of samples to batch size n_batches_bias_correction = int( np.ceil(num_bias_correct_samples / batch_size)) n_batches_quantization = int(np.ceil(num_quant_samples / batch_size)) data_loader_n_samples_bias_corr = utils.IterFirstX( data_loader, n_batches_bias_correction) data_loader_n_samples_quant = utils.IterFirstX(data_loader, n_batches_quantization) # TODO: Remove wrapper function # Create a wrapping function for data loader for quantization def pass_data_through_model(model, early_stopping_iterations=None, use_cuda=False): # pylint: disable=unused-argument # forward pass for given number of batches for model for (images_in_one_batch, _) in data_loader_n_samples_quant: forward_pass(model, images_in_one_batch) ordered_conv_linear_nodes = get_ordered_lists_of_conv_fc( model, input_shape) if conv_bn_dict is None: conv_bn_dict = find_all_conv_bn_with_activation(model, input_shape) # Create a copy of the model as reference model model_copy = copy.deepcopy(model) # Add bias for all the layers whose bias is None for name, module in ordered_conv_linear_nodes: if module.bias is None: if isinstance(module, (torch.nn.Conv2d, torch.nn.ConvTranspose2d)): output_size = module.out_channels elif isinstance(module, torch.nn.Linear): output_size = module.out_features module.bias = torch.nn.Parameter(torch.zeros(output_size)) module.bias.data = module.bias.data.to(device=module.weight.device) # Quantize full model dummy_tensors = utils.create_rand_tensors_given_shapes(input_shape) dummy_tensors = [ tensor.to(utils.get_device(model)) for tensor in dummy_tensors ] q = qsim.QuantizationSimModel(model=model, quant_scheme=quant_params.quant_scheme, rounding_mode=quant_params.round_mode, default_output_bw=quant_params.act_bw, default_param_bw=quant_params.weight_bw, in_place=True, dummy_input=dummy_tensors, config_file=quant_params.config_file) # make sure model got updated in-place before we use it for bc updates assert (q.model is model) # updates to skip_output_activation and layers_to_ignore for name, module in model.named_modules(): # Skip all layer's output quantization if isinstance(module, QcQuantizeWrapper): module.output_quantizers[0].enabled = False q.compute_encodings(pass_data_through_model, None) # For first conv layer, perform analytical bc if perform_only_empirical_bias_corr is set to False # and layer is not marked to be ignored during bc. if not perform_only_empirical_bias_corr: module_name, module = ordered_conv_linear_nodes[0] if module not in layers_to_ignore: logger.info('Correcting layer %s using Analytical Bias Correction', module_name) quantize_layer = utils.get_layer_by_name(model, module_name) call_analytical_mo_correct_bias(quantize_layer, None, None) logger.info('Corrected bias for the layer') ordered_conv_linear_nodes.pop(0) for module_name, module in ordered_conv_linear_nodes: # Ignore all layers which are skipped by user if module in layers_to_ignore: continue else: # make sure module is in the model used by qsim. assert (module in list(q.model.modules())) # Analytical Bias Correction is only done for Conv layers reference_layer = utils.get_layer_by_name(model_copy, module_name) quantize_layer = utils.get_layer_by_name(model, module_name) if module in conv_bn_dict.keys(): bn_layer_info = conv_bn_dict[module] if perform_only_empirical_bias_corr or bn_layer_info is None or bn_layer_info.input_bn is None: logger.info( 'Correcting layer %s using Empirical Bias Correction', module_name) bias_correction = libpymo.BiasCorrection() # Get output from quantized model and reference model for images_in_one_batch, _ in data_loader_n_samples_bias_corr: reference_output_batch = get_output_data( reference_layer, model_copy, images_in_one_batch) quantized_model_output_batch = get_output_data( quantize_layer, model, images_in_one_batch) if isinstance(reference_layer, torch.nn.Linear): extended_shape = np.concatenate( (reference_output_batch.shape, np.array([1, 1]))) reference_output_batch = reference_output_batch.reshape( extended_shape) quantized_model_output_batch = quantized_model_output_batch.reshape( extended_shape) bias_correction.storePreActivationOutput( reference_output_batch) bias_correction.storeQuantizedPreActivationOutput( quantized_model_output_batch) call_empirical_mo_correct_bias(module, bias_correction) else: logger.info( 'Correcting layer %s using Analytical Bias Correction', module_name) call_analytical_mo_correct_bias( quantize_layer, bn_layer_info.input_bn, bn_layer_info.in_activation_type) logger.info('Corrected bias for the layer') SaveUtils.remove_quantization_wrappers(model) logger.info('Completed bias correction')
def visualize_changes_after_optimization_single_layer(name, old_model_module, new_model_module, scatter_plot=False): """ Creates before and after plots for a given layer. :param name: name of module :param old_model_module: the module of the model before optimization :param new_model_module: the module of the model after optimization :param scatter_plot: Include scatter plot in plots :return: None """ device_old_module = get_device(old_model_module) device_new_module = get_device(new_model_module) old_model_module.cpu() new_model_module.cpu() layout = bokeh_plots.PlotsLayout() layout.title = name layer_weights_summary_statistics_old = pd.DataFrame( get_weights(old_model_module)).describe().T layer_weights_summary_statistics_new = pd.DataFrame( get_weights(new_model_module)).describe().T summary_stats_line_plot = line_plot_changes_in_summary_stats( layer_weights_summary_statistics_old, layer_weights_summary_statistics_new, x_axis_label="Output Channel", y_axis_label="Summary statistics", title="Changes in Key Stats Per Output Channel") if scatter_plot: plot_mean_old_model, plot_min_old_model = scatter_plot_summary_stats( layer_weights_summary_statistics_old, x_axis_label_mean="Mean Weights Per Output Channel", y_axis_label_mean="Std Per Output Channel", title_mean="Mean vs Std After Optimization", x_axis_label_min="Min Weights Per Output Channel", y_axis_label_min="Max Weights Per Output Channel", title_min="Min vs Max After Optimization") plot_mean_new_model, plot_min_new_model = scatter_plot_summary_stats( layer_weights_summary_statistics_new, x_axis_label_mean="Mean Weights Per Output Channel", y_axis_label_mean="Std Per Output Channel", title_mean="Mean vs Std Before Optimization", x_axis_label_min="Min Weights Per Output Channel", y_axis_label_min="Max Weights Per Output Channel", title_min="Min vs Max Before Optimization") layout.add_row( row(plot_mean_old_model, plot_mean_new_model, plot_min_old_model)) layout.add_row(row(summary_stats_line_plot, plot_min_new_model)) else: layout.add_row(summary_stats_line_plot) old_model_module.to(device=device_old_module) new_model_module.to(device=device_new_module) return layout.complete_layout()
def save_encodings_to_files(self, model, path, filename_prefix, input_shape): """ Save quantization encodings for the given model in json format :param model: Model to save :param path: Directory path to save :param filename_prefix: Filename of the file to save :param input_shape: shape of the input parameter to the model :return: None """ # pylint: disable=too-many-locals device = utils.get_device(model) model.cpu() encodings_path_onnx_names = os.path.join(path, filename_prefix + '_onnx_names' + '.encodings') encodings_path_python_names = os.path.join(path, filename_prefix + '_pytorch_names' + '.encodings') encoding_dict_with_pytorch_names = {} encoding_dict_with_onnx_names = {} inputs = utils.create_rand_tensors_given_shapes(input_shape) pytorch_onnx_names_dict = self.get_name_of_op_from_graph(model, *inputs) for layer_name, layer in model.named_modules(): if isinstance(layer, QcQuantizeStandalone): value = (layer.output_quantizers[0].encoding.max, layer.output_quantizers[0].encoding.min, layer.output_quantizers[0].encoding.delta, layer.output_quantizers[0].encoding.offset, layer.output_quantizers[0].bitwidth, # hack - standalone layers have no parameters layer.output_quantizers[0].bitwidth) encoding_dict_with_onnx_names[layer_name] = value encoding_dict_with_pytorch_names[layer_name] = value elif isinstance(layer, QcQuantizeWrapper): # This is a hack to keep this working for now.. Need to create new json definitions # The reality is that layers may have more than one parameters, or even 0 parameters, # this code does not handle that currently if layer.param_quantizers: param_bw = next(iter(layer.param_quantizers.values())).bitwidth else: param_bw = layer.output_quantizers[0].bitwidth value = (layer.output_quantizers[0].encoding.max, layer.output_quantizers[0].encoding.min, layer.output_quantizers[0].encoding.delta, layer.output_quantizers[0].encoding.offset, param_bw, layer.output_quantizers[0].encoding.bw) if layer_name in pytorch_onnx_names_dict: encoding_dict_with_onnx_names[pytorch_onnx_names_dict[layer_name]] = value encoding_dict_with_pytorch_names[layer_name] = value if not encoding_dict_with_onnx_names: raise RuntimeError('Could not find any QcQuantizeOps in the model for saving encodings!') save_json_yaml(encodings_path_onnx_names, encoding_dict_with_onnx_names) save_json_yaml(encodings_path_python_names, encoding_dict_with_pytorch_names) model.to(device)