def equalize_model(model: torch.nn.Module, input_shapes: Union[Tuple,
                                                               List[Tuple]]):
    """
    High-level API to perform Cross-Layer Equalization (CLE) on the given model. The model is equalized in place.

    :param model: Model to equalize
    :param input_shapes: Shape of the input (can be a tuple or a list of tuples if multiple inputs)
    :return: None
    """

    device = get_device(model)
    model.cpu()

    # fold batchnorm layers
    folded_pairs = fold_all_batch_norms(model, input_shapes)
    bn_dict = {}
    for conv_bn in folded_pairs:
        bn_dict[conv_bn[0]] = conv_bn[1]

    # replace any ReLU6 layers with ReLU
    utils.replace_modules_of_type1_with_type2(model, torch.nn.ReLU6,
                                              torch.nn.ReLU)

    # perform cross-layer scaling on applicable layer sets
    cls_set_info_list = CrossLayerScaling.scale_model(model, input_shapes)

    # high-bias fold
    HighBiasFold.bias_fold(cls_set_info_list, bn_dict)
    model.to(device=device)
示例#2
0
def fold_all_batch_norms(model: torch.nn.Module, input_shapes: Union[Tuple, List[Tuple]]) -> \
        List[Tuple[torch.nn.Module, torch.nn.BatchNorm2d]]:
    """
    Fold all batch_norm layers in a model into corresponding conv layers

    :param model: Model
    :param input_shapes: Input shapes for the model (can be one or multiple inputs)
    :return: A list of pairs of layers [(Conv/Linear, BN layer that got folded)]
    """
    # Find whether model is on GPU
    device = utils.get_device(model)

    # If model is not on CPU, convert it to CPU
    model.cpu()

    bn_conv_linear_pairs = find_all_batch_norms_to_fold(model, input_shapes)

    fold_given_batch_norms(model, bn_conv_linear_pairs)

    # When returning the pairs, we want the second element of the pair to be the BN
    pairs_to_return = []
    for pair in bn_conv_linear_pairs:
        if isinstance(pair[0], torch.nn.BatchNorm2d):
            pairs_to_return.append((pair[1], pair[0]))
        else:
            pairs_to_return.append(pair)

    model.to(device=device)

    return pairs_to_return
示例#3
0
def create_connected_graph_with_input_shapes(model: torch.nn.Module, input_shapes: Union[Tuple, List[Tuple]]) \
        -> ConnectedGraph:
    """
    Create connected graph, using random inputs generated from given input shapes.
    :param model: torch model to create a connected graph from
    :param input_shapes: input shapes to the torch model
    :return: ConnectedGraph representation of the model
    """
    random_inputs = create_rand_tensors_given_shapes(input_shapes)
    device = get_device(model)
    random_inputs = tuple([inp.to(device) for inp in random_inputs])
    return ConnectedGraph(model, random_inputs)
示例#4
0
    def compute_and_save_weight_encodings(self, path: str,
                                          filename_prefix: str,
                                          input_shape: Union[Tuple,
                                                             List[Tuple]]):
        """
        Save the quantized model weight encodings

        :param path: path where to store model pth and encodings
        :param filename_prefix: filename to store exported weight encodings in json format
        :param input_shape: shape of the input parameter to the model
        :return: None
        """

        device = utils.get_device(self._model)
        self._model.cpu()
        inputs = utils.create_rand_tensors_given_shapes(input_shape)

        # compute weight encodings
        weight_encoding_dict = {}
        weight_encoding_dict_with_onnx_names = {}
        quantized_layers = self.__get_qc_quantized_layers(self._model)
        pytorch_onnx_names_dict = su.SaveUtils.get_name_of_op_from_graph(
            self._model, *inputs)
        for layer_name, layer in quantized_layers:
            if isinstance(layer, QcQuantizeWrapper):
                layer_wt_encoding = layer.compute_weight_encodings()
                # skip dictionary update for no weight encoding case
                if layer_wt_encoding is not None:
                    value = (layer_wt_encoding.max, layer_wt_encoding.min,
                             layer_wt_encoding.delta, layer_wt_encoding.offset,
                             layer_wt_encoding.bw)
                    weight_encoding_dict[layer_name] = value
                    if layer_name in pytorch_onnx_names_dict:
                        weight_encoding_dict_with_onnx_names[
                            pytorch_onnx_names_dict[layer_name]] = value
        # export weight encodings to output json file
        su.SaveUtils.save_weight_encodings_to_files(
            path=path,
            filename_prefix=filename_prefix,
            weight_encoding_dict=weight_encoding_dict,
            weight_encoding_dict_with_onnx_names=
            weight_encoding_dict_with_onnx_names)

        self._model.to(device)
示例#5
0
def find_all_conv_bn_with_activation(model: torch.nn.Module,
                                     input_shape: Tuple) -> Dict:
    """
    Uses searcher to find preceding and next bn layers for a conv/linear layer
    :param model: PyTorch model
    :param input_shape: shape of input to the model
    :return: dictionary of conv/linear layers with associated bn op / activation info
    """

    activation_types = ['relu', 'hardtanh']

    # initialize all patterns to be matched and associated call back functions
    patterns_with_callbacks = []
    layer_select_handler = ConvBnPatternHandler()
    patterns_with_callbacks.append(
        PatternType(pattern=['batch_norm', 'convolution'],
                    action=layer_select_handler))

    patterns_with_callbacks.append(
        PatternType(pattern=['convolution'], action=layer_select_handler))

    patterns_with_callbacks.append(
        PatternType(pattern=['addmm'], action=layer_select_handler))

    for activation in activation_types:
        patterns_with_callbacks.append(
            PatternType(pattern=['batch_norm', activation, 'convolution'],
                        action=layer_select_handler))

    device = utils.get_device(model)
    connected_graph = ConnectedGraph(model,
                                     (torch.rand(input_shape).to(device), ))

    # create graph searcher instance with connected graph and patterns to search
    graph_searcher = GraphSearcher(connected_graph, patterns_with_callbacks)

    # get all conv/linear and bn info
    graph_searcher.find_all_patterns_in_graph_apply_actions()
    convs_bn_activation_dict = layer_select_handler.get_conv_linear_bn_info_dict(
    )

    return convs_bn_activation_dict
示例#6
0
def visualize_weight_ranges_single_layer(layer,
                                         layer_name,
                                         scatter_plot=False):
    """
    Given a layer, visualizes weight ranges with scatter plots and line plots
    :param layer: layer with weights
    :param layer_name: layer name
    :param scatter_plot: Include scatter plot in plots
    :return: None
    """
    device = get_device(layer)
    layer.cpu()
    layer_weights = pd.DataFrame(get_weights(layer))
    layer_weights_summary_statistics = layer_weights.describe().T

    line_plots = line_plot_summary_statistics_model(
        layer_name=layer_name,
        layer_weights_data_frame=layer_weights_summary_statistics,
        width=1000,
        height=700)

    if scatter_plot:

        scatter_plot_mean, scatter_plot_min = scatter_plot_summary_stats(
            layer_weights_summary_statistics,
            x_axis_label_mean="Mean Weights Per Output Channel",
            y_axis_label_mean="Std Per Output Channel",
            title_mean="Mean vs Standard Deviation: " + layer_name,
            x_axis_label_min="Min Weights Per Output Channel",
            y_axis_label_min="Max Weights Per Output Channel",
            title_min="Minimum vs Maximum: " + layer_name)

        scatter_plots_layout = row(scatter_plot_mean, scatter_plot_min)

        layout = column(scatter_plots_layout, line_plots)
    else:
        layout = line_plots
    layout_with_title = add_title(layout, layer_name)

    # Move layer back to device
    layer.to(device=device)
    return layout_with_title
    def scale_model(
            model: torch.nn.Module,
            input_shapes: Union[Tuple, List[Tuple]]) -> List[ClsSetInfo]:
        """
        Uses cross-layer scaling to scale all applicable layers in the given model

        :param model: Model to scale
        :param input_shapes: Input shape for the model (can be one or multiple inputs)
        :return: CLS information for each CLS set
        """

        device = get_device(model)
        model.cpu()

        # Find layer groups
        graph_search = GraphSearchUtils(model, input_shapes)
        layer_groups = graph_search.find_layer_groups_to_scale()

        # Find cls sets from the layer groups
        cls_sets = []
        for layer_group in layer_groups:
            cls_set = GraphSearchUtils.convert_layer_group_to_cls_sets(
                layer_group)
            cls_sets += cls_set

        # Scale the CLS sets
        scale_factors = CrossLayerScaling.scale_cls_sets(cls_sets)

        # Find if there were relu activations between layers of each cls set
        is_relu_activation_in_cls_sets = graph_search.is_relu_activation_present_in_cls_sets(
            cls_sets)

        # Convert to a list of cls-set-info elements
        cls_set_info_list = CrossLayerScaling.create_cls_set_info_list(
            cls_sets, scale_factors, is_relu_activation_in_cls_sets)

        model.to(device=device)
        return cls_set_info_list
示例#8
0
    def _forward_pass(model: torch.nn.Module, batch: Union[torch.Tensor, List,
                                                           Tuple]):
        """
        forward pass depending model allocation on CPU / GPU till StopForwardException
        :param model: model
        :param batch: batch
        """
        # keep the model in eval mode
        model.eval()

        # get the model's device placement information
        device = utils.get_device(model)
        # place the batch to appropriate device
        batch = utils.change_tensor_device_placement(batch, device)

        if isinstance(batch, torch.Tensor):
            batch = [batch]

        try:
            with torch.no_grad():
                _ = model(*batch)
        except StopForwardException:
            pass
示例#9
0
def correct_bias(model: torch.nn.Module,
                 quant_params: qsim.QuantParams,
                 num_quant_samples: int,
                 data_loader,
                 num_bias_correct_samples: int,
                 conv_bn_dict: Union[Dict[torch.nn.Module, ConvBnInfoType],
                                     None] = None,
                 perform_only_empirical_bias_corr: bool = True,
                 layers_to_ignore: List[torch.nn.Module] = None):
    """
    Corrects bias for each Conv layer of model (unless ignored). A combination of Analytical and Empirical Bias
    Correction is used i.e. all the layers which can be corrected using Analytical Bias Correction are corrected
    using Analytical Bias Correction and remaining layers are corrected using Empirical method.

    Returns an in-place corrected floating point model

    :param model: Model to be corrected
    :param quant_params: Named tuple for quantization simulation for bias correction
    :param num_quant_samples: number of samples of images to pass through quantization sim for bias correction.
    :param data_loader: data loader for the model
    :param num_bias_correct_samples: number of samples for Bias correction
    :param conv_bn_dict: Dict of conv and bn with information related to activation. If None, the function calc it
    :param perform_only_empirical_bias_corr: Default True. If true will perform only empirical Bias Corr for all layers
           irrespective of the fact that layer is eligible for Analytical Bias Corr.
    :param layers_to_ignore: list of layer names for which we need to skip bias correction.

    """

    if layers_to_ignore is None:
        layers_to_ignore = []

    # Find batch size and shape of input tensor
    batch_size, input_shape = utils.get_input_shape_batch_size(data_loader)

    # Rounding up number of samples to batch size
    n_batches_bias_correction = int(
        np.ceil(num_bias_correct_samples / batch_size))
    n_batches_quantization = int(np.ceil(num_quant_samples / batch_size))

    data_loader_n_samples_bias_corr = utils.IterFirstX(
        data_loader, n_batches_bias_correction)
    data_loader_n_samples_quant = utils.IterFirstX(data_loader,
                                                   n_batches_quantization)

    # TODO: Remove wrapper function
    # Create a wrapping function for data loader for quantization
    def pass_data_through_model(model,
                                early_stopping_iterations=None,
                                use_cuda=False):
        # pylint: disable=unused-argument
        # forward pass for given number of batches for model
        for (images_in_one_batch, _) in data_loader_n_samples_quant:
            forward_pass(model, images_in_one_batch)

    ordered_conv_linear_nodes = get_ordered_lists_of_conv_fc(
        model, input_shape)

    if conv_bn_dict is None:
        conv_bn_dict = find_all_conv_bn_with_activation(model, input_shape)

    # Create a copy of the model as reference model
    model_copy = copy.deepcopy(model)

    # Add bias for all the layers whose bias is None
    for name, module in ordered_conv_linear_nodes:
        if module.bias is None:
            if isinstance(module, (torch.nn.Conv2d, torch.nn.ConvTranspose2d)):
                output_size = module.out_channels
            elif isinstance(module, torch.nn.Linear):
                output_size = module.out_features
            module.bias = torch.nn.Parameter(torch.zeros(output_size))
            module.bias.data = module.bias.data.to(device=module.weight.device)

    # Quantize full model
    dummy_tensors = utils.create_rand_tensors_given_shapes(input_shape)
    dummy_tensors = [
        tensor.to(utils.get_device(model)) for tensor in dummy_tensors
    ]
    q = qsim.QuantizationSimModel(model=model,
                                  quant_scheme=quant_params.quant_scheme,
                                  rounding_mode=quant_params.round_mode,
                                  default_output_bw=quant_params.act_bw,
                                  default_param_bw=quant_params.weight_bw,
                                  in_place=True,
                                  dummy_input=dummy_tensors,
                                  config_file=quant_params.config_file)

    # make sure  model got updated in-place before we use it for bc updates
    assert (q.model is model)

    # updates to skip_output_activation and layers_to_ignore
    for name, module in model.named_modules():
        # Skip all layer's output quantization
        if isinstance(module, QcQuantizeWrapper):
            module.output_quantizers[0].enabled = False

    q.compute_encodings(pass_data_through_model, None)

    # For first conv layer, perform analytical bc if perform_only_empirical_bias_corr is set to False
    # and layer is not marked to be ignored during bc.
    if not perform_only_empirical_bias_corr:
        module_name, module = ordered_conv_linear_nodes[0]
        if module not in layers_to_ignore:
            logger.info('Correcting layer %s using Analytical Bias Correction',
                        module_name)
            quantize_layer = utils.get_layer_by_name(model, module_name)
            call_analytical_mo_correct_bias(quantize_layer, None, None)
            logger.info('Corrected bias for the layer')
            ordered_conv_linear_nodes.pop(0)

    for module_name, module in ordered_conv_linear_nodes:
        # Ignore all layers which are skipped by user
        if module in layers_to_ignore:
            continue
        else:
            # make sure module is in the model used by qsim.
            assert (module in list(q.model.modules()))
            # Analytical Bias Correction is only done for Conv layers
            reference_layer = utils.get_layer_by_name(model_copy, module_name)
            quantize_layer = utils.get_layer_by_name(model, module_name)

            if module in conv_bn_dict.keys():

                bn_layer_info = conv_bn_dict[module]

                if perform_only_empirical_bias_corr or bn_layer_info is None or bn_layer_info.input_bn is None:
                    logger.info(
                        'Correcting layer %s using Empirical Bias Correction',
                        module_name)
                    bias_correction = libpymo.BiasCorrection()

                    # Get output from quantized model and reference model

                    for images_in_one_batch, _ in data_loader_n_samples_bias_corr:
                        reference_output_batch = get_output_data(
                            reference_layer, model_copy, images_in_one_batch)
                        quantized_model_output_batch = get_output_data(
                            quantize_layer, model, images_in_one_batch)

                        if isinstance(reference_layer, torch.nn.Linear):
                            extended_shape = np.concatenate(
                                (reference_output_batch.shape, np.array([1,
                                                                         1])))
                            reference_output_batch = reference_output_batch.reshape(
                                extended_shape)
                            quantized_model_output_batch = quantized_model_output_batch.reshape(
                                extended_shape)

                        bias_correction.storePreActivationOutput(
                            reference_output_batch)
                        bias_correction.storeQuantizedPreActivationOutput(
                            quantized_model_output_batch)

                    call_empirical_mo_correct_bias(module, bias_correction)

                else:
                    logger.info(
                        'Correcting layer %s using Analytical Bias Correction',
                        module_name)
                    call_analytical_mo_correct_bias(
                        quantize_layer, bn_layer_info.input_bn,
                        bn_layer_info.in_activation_type)

                logger.info('Corrected bias for the layer')

    SaveUtils.remove_quantization_wrappers(model)

    logger.info('Completed bias correction')
示例#10
0
def visualize_changes_after_optimization_single_layer(name,
                                                      old_model_module,
                                                      new_model_module,
                                                      scatter_plot=False):
    """
    Creates before and after plots for a given layer.
    :param name: name of module
    :param old_model_module: the module of the model before optimization
    :param new_model_module: the module of the model after optimization
    :param scatter_plot: Include scatter plot in plots
    :return: None
    """

    device_old_module = get_device(old_model_module)
    device_new_module = get_device(new_model_module)

    old_model_module.cpu()
    new_model_module.cpu()

    layout = bokeh_plots.PlotsLayout()
    layout.title = name
    layer_weights_summary_statistics_old = pd.DataFrame(
        get_weights(old_model_module)).describe().T
    layer_weights_summary_statistics_new = pd.DataFrame(
        get_weights(new_model_module)).describe().T

    summary_stats_line_plot = line_plot_changes_in_summary_stats(
        layer_weights_summary_statistics_old,
        layer_weights_summary_statistics_new,
        x_axis_label="Output Channel",
        y_axis_label="Summary statistics",
        title="Changes in Key Stats Per Output Channel")

    if scatter_plot:
        plot_mean_old_model, plot_min_old_model = scatter_plot_summary_stats(
            layer_weights_summary_statistics_old,
            x_axis_label_mean="Mean Weights Per Output Channel",
            y_axis_label_mean="Std Per Output Channel",
            title_mean="Mean vs Std After Optimization",
            x_axis_label_min="Min Weights Per Output Channel",
            y_axis_label_min="Max Weights Per Output Channel",
            title_min="Min vs Max After Optimization")

        plot_mean_new_model, plot_min_new_model = scatter_plot_summary_stats(
            layer_weights_summary_statistics_new,
            x_axis_label_mean="Mean Weights Per Output Channel",
            y_axis_label_mean="Std Per Output Channel",
            title_mean="Mean vs Std Before Optimization",
            x_axis_label_min="Min Weights Per Output Channel",
            y_axis_label_min="Max Weights Per Output Channel",
            title_min="Min vs Max Before Optimization")
        layout.add_row(
            row(plot_mean_old_model, plot_mean_new_model, plot_min_old_model))

        layout.add_row(row(summary_stats_line_plot, plot_min_new_model))
    else:
        layout.add_row(summary_stats_line_plot)

    old_model_module.to(device=device_old_module)
    new_model_module.to(device=device_new_module)

    return layout.complete_layout()
示例#11
0
    def save_encodings_to_files(self, model, path, filename_prefix, input_shape):
        """
        Save quantization encodings for the given model in json format
        :param model: Model to save
        :param path: Directory path to save
        :param filename_prefix: Filename of the file to save
        :param input_shape: shape of the input parameter to the model
        :return: None
        """
        # pylint: disable=too-many-locals
        device = utils.get_device(model)
        model.cpu()

        encodings_path_onnx_names = os.path.join(path, filename_prefix + '_onnx_names' + '.encodings')
        encodings_path_python_names = os.path.join(path, filename_prefix + '_pytorch_names' + '.encodings')

        encoding_dict_with_pytorch_names = {}
        encoding_dict_with_onnx_names = {}

        inputs = utils.create_rand_tensors_given_shapes(input_shape)

        pytorch_onnx_names_dict = self.get_name_of_op_from_graph(model, *inputs)

        for layer_name, layer in model.named_modules():

            if isinstance(layer, QcQuantizeStandalone):
                value = (layer.output_quantizers[0].encoding.max,
                         layer.output_quantizers[0].encoding.min,
                         layer.output_quantizers[0].encoding.delta,
                         layer.output_quantizers[0].encoding.offset,
                         layer.output_quantizers[0].bitwidth,  # hack - standalone layers have no parameters
                         layer.output_quantizers[0].bitwidth)
                encoding_dict_with_onnx_names[layer_name] = value
                encoding_dict_with_pytorch_names[layer_name] = value

            elif isinstance(layer, QcQuantizeWrapper):

                # This is a hack to keep this working for now.. Need to create new json definitions
                # The reality is that layers may have more than one parameters, or even 0 parameters,
                # this code does not handle that currently
                if layer.param_quantizers:
                    param_bw = next(iter(layer.param_quantizers.values())).bitwidth
                else:
                    param_bw = layer.output_quantizers[0].bitwidth

                value = (layer.output_quantizers[0].encoding.max,
                         layer.output_quantizers[0].encoding.min,
                         layer.output_quantizers[0].encoding.delta,
                         layer.output_quantizers[0].encoding.offset,
                         param_bw,
                         layer.output_quantizers[0].encoding.bw)
                if layer_name in pytorch_onnx_names_dict:
                    encoding_dict_with_onnx_names[pytorch_onnx_names_dict[layer_name]] = value
                    encoding_dict_with_pytorch_names[layer_name] = value

        if not encoding_dict_with_onnx_names:
            raise RuntimeError('Could not find any QcQuantizeOps in the model for saving encodings!')

        save_json_yaml(encodings_path_onnx_names, encoding_dict_with_onnx_names)
        save_json_yaml(encodings_path_python_names, encoding_dict_with_pytorch_names)

        model.to(device)