Exemplo n.º 1
0
def pruning_loss_sens_magnitude_iter(
    model: Union[str, ModelProto],
    sparsity_levels: Union[List[float],
                           Tuple[float,
                                 ...]] = default_pruning_sparsities_loss(True),
) -> Generator[Tuple[PruningLossSensitivityAnalysis, KSSensitivityProgress],
               None, None]:
    """
    Approximated kernel sparsity (pruning) loss analysis for a given model.
    Iteratively builds a KSLossSensitivityAnalysis object and yields an updated
    version after each layer is run. The final result is the complete
    analysis object.

    :param model: the loaded model or a file path to the onnx model
        to calculate the sparse sensitivity analysis for
    :param sparsity_levels: the sparsity levels to calculate the loss for for each param
    :return: the analysis results for the model with an additional layer at each
        iteration along with a float representing the iteration progress
    """
    model = check_load_model(model)
    prunable = get_prunable_nodes(model)
    analysis = PruningLossSensitivityAnalysis()
    num_layers = len(prunable)

    for index, node in enumerate(prunable):
        node_id = extract_node_id(node)

        yield analysis, KSSensitivityProgress(index, node_id, num_layers,
                                              float(index) / float(num_layers))

        weight, bias = get_node_params(model, node)
        values = numpy.sort(numpy.abs(weight.val.flatten()))
        prev_index = 0

        for sparsity in sparsity_levels:
            val_index = round(sparsity * values.size)

            if val_index >= len(values):
                val_index = len(values) - 1

            if sparsity <= 1e-9:
                baseline = True
                sparsity = 0.0
                sparse_avg = 0.0
            else:
                baseline = False

                if val_index > prev_index:
                    sparse_avg = values[prev_index:val_index].mean().item()
                    prev_index = val_index
                else:
                    sparse_avg = values[val_index].item()
                    prev_index = val_index + 1

            analysis.add_result(node_id, weight.name, index, sparsity,
                                sparse_avg, baseline)

    yield analysis, KSSensitivityProgress(num_layers, None, num_layers, 1.0)
Exemplo n.º 2
0
    def __init__(
        self, model: Union[ModelProto, str, None], nodes: List[NodeAnalyzer] = None
    ):
        if model is None and nodes is None:
            raise ValueError("model or nodes must not be None")

        if model is not None and nodes is not None:
            raise ValueError("model or nodes must be None, both cannot be passed")

        if model is not None:
            model = check_load_model(model)
            node_shapes = extract_node_shapes(model)
            self._nodes = [
                NodeAnalyzer(
                    model, node, node_shape=node_shapes.get(extract_node_id(node))
                )
                for node in model.graph.node
            ]
        else:
            self._nodes = nodes
Exemplo n.º 3
0
    def run(self) -> Iterator[Dict[str, Any]]:
        """
        Perform the work for the job.
        Runs and saves the appropriate benchmark based on the configuration

        :return: an iterator containing progress update information
        """
        _LOGGER.info(("running benchmark for project_id {} and "
                      "model_id {} and benchmark_id {} with "
                      "core_counts:{}, batch sizes:{} "
                      "instruction_sets:{}, inference_models:{} ").format(
                          self.project_id,
                          self.model_id,
                          self.benchmark_id,
                          self.core_counts,
                          self.batch_sizes,
                          self.instruction_sets,
                          self.inference_models,
                      ))

        project_model = self._get_project_model()
        project_model.validate_filesystem()
        benchmark = self._get_project_benchmark()
        benchmark.result = {"benchmarks": []}
        sys_info = get_ml_sys_info()
        cores_per_socket = (sys_info["cores_per_socket"]
                            if "cores_per_socket" in sys_info else 1)
        num_sockets = sys_info[
            "num_sockets"] if "num_sockets" in sys_info else 1
        max_cores = cores_per_socket * num_sockets

        optims = set()
        for inference_model in self.inference_models:
            inference_model_optimization = inference_model[
                "inference_model_optimization"]
            if inference_model_optimization:
                optims.add(inference_model_optimization)

        iterables = [
            x for x in itertools.product(self.core_counts, self.batch_sizes,
                                         self.inference_models)
        ]
        num_steps = len(iterables) + len(optims)
        step_index = 0

        pruned_models = {}

        for inference_model_optimization in optims:
            model_proto = check_load_model(project_model.file_path)

            for progress in self._get_pruned_model_proto(
                    model_proto, inference_model_optimization):
                yield JobProgressSchema().dump({
                    "iter_indefinite":
                    False,
                    "iter_class":
                    "benchmark",
                    "iter_val": (step_index + progress) / num_steps,
                    "num_steps":
                    num_steps,
                    "step_index":
                    step_index,
                    "step_class":
                    "pruning_{}".format(inference_model_optimization),
                })
            pruned_models[inference_model_optimization] = model_proto

            step_index += 1

        for core_count, batch_size, inference_model in iterables:
            inference_engine = inference_model["inference_engine"]
            inference_model_optimization = inference_model[
                "inference_model_optimization"]

            model = project_model.file_path

            if inference_model_optimization:
                model = pruned_models[inference_model_optimization]

            if inference_engine == ORT_CPU_ENGINE and (core_count == max_cores
                                                       or core_count < 1):
                runner = ORTModelRunner(model, batch_size=batch_size)
            elif inference_engine == ORT_CPU_ENGINE and (
                    core_count != max_cores and core_count > 0):
                _LOGGER.error(
                    "Can only run onnxruntime with max core count of {}".
                    format(max_cores))
                raise Exception(
                    "Can only run onnxruntime with max core count of {}".
                    format(max_cores))
            elif inference_engine == DEEPSPARSE_ENGINE:
                runner = DeepSparseModelRunner(model, batch_size, core_count)
            elif inference_engine == ORT_GPU_ENGINE:
                raise NotImplementedError()
            else:
                raise ValueError(
                    "Invalid inference engine {}".format(inference_engine))

            step_class = ("{}_optim_{}_batch_size_{}_core_count_{}".format(
                inference_engine,
                inference_model_optimization,
                batch_size,
                core_count,
            ) if inference_model_optimization else
                          "{}_batch_size_{}_core_count_{}".format(
                              inference_engine, batch_size, core_count))

            _LOGGER.debug(step_class)

            for progress in self._run_benchmark(
                    benchmark,
                    model,
                    runner,
                    core_count,
                    batch_size,
                    inference_engine,
                    inference_model_optimization,
                    num_steps,
                    step_index,
            ):
                yield JobProgressSchema().dump({
                    "iter_indefinite": False,
                    "iter_class": "benchmark",
                    "iter_val": progress,
                    "num_steps": num_steps,
                    "step_index": step_index,
                    "step_class": step_class,
                })
            step_index += 1
        benchmark.save()
Exemplo n.º 4
0
def test_check_load_model(onnx_repo_models):  # noqa: F811
    model_path = onnx_repo_models.model_path
    loaded_model = load_model(model_path)
    assert loaded_model == check_load_model(model_path)
    assert loaded_model == check_load_model(loaded_model)
Exemplo n.º 5
0
def pruning_loss_sens_one_shot_iter(
    model: Union[str, ModelProto],
    data: DataLoader,
    batch_size: int,
    steps_per_measurement: int,
    sparsity_levels: List[float] = default_pruning_sparsities_loss(False),
    use_deepsparse_inference: bool = False,
) -> Generator[Tuple[PruningLossSensitivityAnalysis, KSSensitivityProgress],
               None, None]:
    """
    Run a one shot sensitivity analysis for kernel sparsity.
    It does not retrain.
    Moves layer by layer to calculate the sensitivity analysis for each and
    resets the previously run layers.
    Updates and yeilds the KSLossSensitivityAnalysis at each layer.
    The loss is calculated by taking the kl_divergence of
    pruned values from the baseline.

    :param model: the loaded model or a file path to the onnx model
        to calculate the sparse sensitivity analysis for
    :param data: the data to run through the model
    :param batch_size: the batch size the data is created for
    :param steps_per_measurement: number of steps (batches) to run through
        the model for each sparsity level on each node
    :param sparsity_levels: the sparsity levels to calculate the loss for for each param
    :param use_deepsparse_inference: True to use the DeepSparse inference engine
        to run the analysis, False to use onnxruntime
    :return: the sensitivity results for every node that is prunable,
        yields update at each layer along with iteration progress
    """
    model = check_load_model(model)
    prunable_nodes = get_prunable_nodes(model)
    analysis = PruningLossSensitivityAnalysis()
    num_updates = len(prunable_nodes) * len(sparsity_levels) + 1
    update_num = 0

    yield analysis, KSSensitivityProgress(update_num, None, num_updates, 0.0)

    runner = (ORTModelRunner(model) if not use_deepsparse_inference else
              DeepSparseModelRunner(model, batch_size))
    _LOGGER.debug("created runner for one shot analysis {}".format(runner))
    base_outputs, _ = runner.run(
        data,
        desc="",
        show_progress=False,
        max_steps=steps_per_measurement,
    )
    _LOGGER.debug("recorded base outputs")
    del runner

    for index, node in enumerate(prunable_nodes):
        node_id = extract_node_id(node)
        weight, bias = get_node_params(model, node)
        _LOGGER.debug("running one shot for node {}".format(node_id))

        for sparsity in sparsity_levels:
            update_num += 1
            yield analysis, KSSensitivityProgress(
                update_num,
                {
                    "node_id": node_id,
                    "sparsity": sparsity
                },
                num_updates,
                float(update_num) / float(num_updates),
            )

            prune_model_one_shot(model, [node], sparsity)
            _LOGGER.debug(
                "created one shot pruned model for sparsity {}".format(
                    sparsity))
            runner = (ORTModelRunner(model) if not use_deepsparse_inference
                      else DeepSparseModelRunner(model, batch_size))
            _LOGGER.debug(
                "created runner for one shot analysis {}".format(runner))
            pruned_outputs, _ = runner.run(
                data,
                desc="",
                show_progress=False,
                max_steps=steps_per_measurement,
            )
            del runner
            _LOGGER.debug("recorded outputs")

            for base, pruned in zip(base_outputs, pruned_outputs):
                batch_losses = []

                for key, base_array in base.items():
                    pruned_array = pruned[key]
                    loss = kl_divergence(
                        pruned_array,
                        base_array,
                        min(base_array.min(), pruned_array.min()),
                    )
                    batch_losses.append(loss)

                analysis.add_result(
                    node_id,
                    weight.name,
                    index,
                    sparsity,
                    sum(batch_losses),
                    baseline=sparsity < 1e-9,
                )
        # reset node to its baseline density
        update_model_param(model, weight.name, weight.val)

    yield analysis, KSSensitivityProgress(num_updates, None, num_updates, 1.0)