Пример #1
0
        def partition_and_infer(subgraph):
            def get_out_node_ids():
                # Gets the final output nodes - producer nodes of graph output tensors without other outputs.
                with subgraph.node_ids():
                    out_node_ids = set()
                    for out in subgraph.outputs:
                        if not out.outputs and not isinstance(out, Constant):
                            for n_inp in out.inputs:
                                out_node_ids.add(n_inp.id)
                return out_node_ids

            # Compute each output node in a separate subgraph.
            out_node_ids = get_out_node_ids()
            constant_values = {}

            for index in out_node_ids:  # Have to use index since 'node' is not in part
                part = subgraph.copy()
                out_node = part.nodes[index]
                part.outputs = out_node.outputs
                part.name = "Folding: {:}".format(
                    [out.name for out in part.outputs])
                part.cleanup(remove_unused_graph_inputs=True)
                names = [out.name for out in part.outputs]

                try:
                    # Determining types is not trivial, and ONNX-RT does its own type inference.
                    sess = rt.InferenceSession(
                        export_onnx(part,
                                    do_type_check=False).SerializeToString())
                    values = sess.run(names, {})
                except Exception as err:
                    G_LOGGER.warning(
                        "Inference failed for subgraph: {:}. Note: Error was:\n{:}"
                        .format(part.name, err))
                    if partitioning == "recursive":
                        G_LOGGER.verbose(
                            "Attempting to recursively partition subgraph")
                        # Partition failed, peel off last node.
                        # We only need to remove one node, so avoid doing an expensive call to cleanup()
                        part.outputs = out_node.inputs
                        del part.nodes[part.nodes.index(out_node)]
                        out_node.outputs.clear()
                        out_node.inputs.clear()
                    else:
                        G_LOGGER.info(
                            "You may see better results if you set partitioning='recursive'"
                        )
                        if not error_ok:
                            raise err

                    constant_values.update(partition_and_infer(part))
                else:
                    constant_values.update(
                        {name: val
                         for name, val in zip(names, values)})

            return constant_values
Пример #2
0
    def fold_constants(self,
                       fold_shapes=True,
                       recurse_subgraphs=True,
                       partitioning=None,
                       error_ok=True):
        """
        Folds constants in-place in the graph. The graph must be topologically sorted prior to
        calling this function (see `toposort()`).

        This function will not remove constants after folding them. In order to get rid of
        these hanging nodes, you can run the `cleanup()` function.

        *Note: Due to how this function is implemented, the graph must be exportable to ONNX,
        and evaluable in ONNX-Runtime. Additionally, ONNX-Runtime must be installed.*

        Args:
            fold_shapes (bool):
                    Whether to fold `Shape` nodes in the graph.
                    This requires shapes to be inferred in the graph, and can only fold
                    static shapes.
                    Defaults to True.
            recurse_subgraphs (bool):
                    Whether to recursively fold constants in subgraphs.
                    Defaults to True.
            partitioning (Union[str, None]):
                    Whether/How to partition the graph so that errors in folding one
                    part of a model do not affect other parts. Available modes are:

                    - None: Do not partition the graph. If inference fails, no constants are folded.
                    - "basic": Partition the graph. If inference fails in one partition, other partitions will
                            remain unaffected.
                    - "recursive": Parition the graph recursively. If inference fails in a partition, the partition
                            will be further paritioned.

                    Defaults to None.
            error_ok (bool):
                    Whether inference errors should be suppressed.
                    When this is enabled, any errors encountered during inference will be re-raised.
                    Defaults to True.

        Returns:
            self
        """
        import onnxruntime as rt
        from onnx_graphsurgeon.exporters.onnx_exporter import export_onnx

        PARTITIONING_MODES = [None, "basic", "recursive"]
        if partitioning not in PARTITIONING_MODES:
            G_LOGGER.critical(
                "Argument for parameter 'partitioning' must be one of: {:}".
                format(PARTITIONING_MODES))

        # First perform shape tensor cast elision on the graph prior to other constant folding
        # Search for Cast(s) (from int -> float) -> intermediate operator (with float constants) -> Cast(s) (back to int)
        # This pattern is problematic for TensorRT since these operations may be performed on Shape Tensors, which
        # are not allowed to be floating point type. Attempt to fold the pattern here
        VALID_CAST_ELISION_OPS = [
            "Add", "Sub", "Mul", "Div", "Max", "Min", "Equal", "Greater",
            "Less", "Concat"
        ]

        def run_cast_elision(node):
            import onnx

            if node.op not in VALID_CAST_ELISION_OPS:
                return

            # Get list of input nodes
            inp_casts = [
                inp_node for inp_tensor in node.inputs
                for inp_node in inp_tensor.inputs
                if inp_node.op == "Cast" and inp_node.attrs["to"] == 1
            ]

            # No cast nodes found, return early
            if not inp_casts:
                return

            # Ensure that all input cast nodes are casting from the same type
            final_type = None
            for inp in inp_casts:
                curr_type = onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[
                    inp.inputs[0].dtype]
                final_type = final_type or curr_type
                if final_type != curr_type:
                    return

            # Check validity and get list of output nodes
            out_casts = []

            for out_tensor in node.outputs:
                for out_node in out_tensor.outputs:
                    if out_node.op != "Cast" or out_node.attrs["to"] not in [
                            6, 7
                    ]:
                        # Can exit early if any of the output nodes are not valid casts
                        return
                    out_casts.append(out_node)
                    # Check that all final cast types are the same.
                    curr_type = out_node.attrs["to"]
                    if final_type != curr_type:
                        return

            # If all checks passed - update constant values.
            for inp in node.inputs:
                if isinstance(inp, Constant):
                    inp.values = inp.values.astype(
                        onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[final_type])

            # "Remove" casts nodes by changing I/O node operators to Identity. Update corresponding tensor dtypes as well
            def replace_with_identity(cast_node, change_dtype):
                cast_node.op = "Identity"
                cast_node.attrs = {}
                getattr(
                    cast_node, change_dtype
                )[0].dtype = onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[final_type]
                G_LOGGER.debug("Cast node {:} elided".format(cast_node.name))

            for inp in inp_casts:
                replace_with_identity(inp, change_dtype="outputs")

            for out in out_casts:
                replace_with_identity(out, change_dtype="inputs")

        # Perform shape tensor cast elision:
        if fold_shapes:
            G_LOGGER.debug(
                "Performing shape tensor cast elision in {:}".format(
                    self.name))
            try:
                for node in self.nodes:
                    run_cast_elision(node)
            except Exception as err:
                if not error_ok:
                    raise err
                G_LOGGER.warning("'{:}' routine failed with: {:}".format(
                    "Shape tensor cast elision", err))

        G_LOGGER.debug("Folding constants in {:}".format(self.name))

        graph_clone = self.copy()
        clone_tensors = graph_clone.tensors()

        # We find graph constants in two passes:
        # Pass 1 finds all Constant tensors in the graph, then walks over their outputs.
        # Pass 2 searches for Shape nodes that have variable inputs (i.e. not marked const in pass 1)
        #    and turns them into Constants iff the input has a statically known shape.

        def update_foldable_outputs(graph_constants):
            def is_foldable(node):
                def all_tensors_const(tensors):
                    return all([t.name in graph_constants for t in tensors])

                if not all_tensors_const(node.inputs):
                    return False

                all_subgraph_foreign_tensors_const = True
                for attr in node.attrs.values():
                    if isinstance(attr, Graph):
                        foreign_tensors = attr._foreign_tensors().values()
                        all_subgraph_foreign_tensors_const &= all_tensors_const(
                            foreign_tensors)
                return all_subgraph_foreign_tensors_const

            # Walks along the outputs of graph_constants to see if they can also be computed statically.
            # Since the graph is topologically sorted, this should find all constant nodes in the graph.
            for node in graph_clone.nodes:
                if is_foldable(node):
                    graph_constants.update(
                        {out.name: out
                         for out in node.outputs})
            return graph_constants

        # Pass 1: Non-shape Constant Folding

        graph_constants = {
            name: tensor
            for name, tensor in clone_tensors.items()
            if isinstance(tensor, Constant)
        }

        # Replaces outputs of Constant nodes with constant tensors
        for tensor in clone_tensors.values():
            if len(tensor.inputs) == 1:
                node = tensor.inputs[0]
                if node.op == "Constant":
                    graph_constants[tensor.name] = tensor.to_constant(
                        node.attrs["value"]._values
                    )  # Using ._values avoids copying
                    graph_constants[tensor.name].inputs.clear()

        graph_constants = update_foldable_outputs(graph_constants)

        # Pass 2: Shape Folding

        def get_producer(tensor, op):
            """
            Get the producer of the specified tensor iff it matches op
            """
            if len(tensor.inputs) != 1:
                return None

            node = tensor.inputs[0]
            if node.op != op:
                return None
            return node

        def get_input(node, index=0):
            """
            Get the input tensor of a node iff the input tensor is not already marked a graph constant.
            """
            if node is None:
                return None

            inp = node.inputs[index]

            # If the input was already found to be a constant, it will be folded anyway.
            if inp.name in graph_constants:
                return None

            return inp

        def get_scalar_value(tensor):
            """
            Gets the scalar value of a tensor with a single item
            """
            if not tensor.shape:
                return tensor.values
            else:
                return list(tensor.values)[0]

        def fold_shape(tensor):
            inp = get_input(get_producer(tensor, "Shape"))
            if inp is None:
                return None

            if inp.shape is None or misc.is_dynamic_shape(inp.shape):
                return None
            return np.array(inp.shape, dtype=np.int64)

        def fold_shape_gather(tensor):
            gather = get_producer(tensor, "Gather")
            if gather is None:
                return None

            data = gather.inputs[0]
            indices_tensor = gather.inputs[1]

            inp = get_input(get_producer(data, "Shape"))
            if inp is None or inp.shape is None:
                return None

            if not isinstance(indices_tensor, Constant):
                return None

            indices = indices_tensor.values
            if not indices.shape:  # Scalar-case
                shape = inp.shape[int(indices)]
                if misc.is_dynamic_dimension(shape):
                    return None
            else:
                shape = [inp.shape[index] for index in indices]
                if misc.is_dynamic_shape(shape):
                    return None

            return np.array(shape, dtype=np.int64)

        def fold_shape_slice(tensor):
            slice = get_producer(tensor, "Slice")
            if slice is None:
                return None

            data = slice.inputs[0]

            if len(slice.inputs) >= 3:
                starts, ends = slice.inputs[1:3]
                if any(not isinstance(t, Constant) for t in [starts, ends]):
                    return None
                starts, ends = get_scalar_value(starts), get_scalar_value(ends)
            elif "starts" in slice.attrs and "ends" in slice.attrs:
                starts, ends = slice.attrs["starts"][0], slice.attrs["ends"][0]
            else:
                return None

            inp = get_input(get_producer(data, "Shape"))
            if inp is None or inp.shape is None:
                return None

            # For shape tensors, we can only slice on the 0th dimension.
            if len(slice.inputs) > 3:
                axes = slice.inputs[3]
                if not isinstance(axes, Constant):
                    return None

                if get_scalar_value(axes) != 0:
                    return None
            elif "axes" in slice.attrs:
                if slice.attrs["axes"][0] != 0:
                    return None

            steps = 1
            if len(slice.inputs) > 4:
                steps = slice.inputs[4]
                if not isinstance(steps, Constant):
                    return None
                steps = get_scalar_value(steps)
            elif "steps" in slice.attrs:
                steps = slice.attrs["steps"][0]

            shape = inp.shape[starts:ends:steps]
            if misc.is_dynamic_shape(shape):
                return None

            return np.array(shape, dtype=np.int64)

        if fold_shapes:
            # NOTE: The order of shape folding passes is important to maximize how much we fold (phase-ordering problem).
            SHAPE_FOLD_FUNCS = [
                fold_shape_gather, fold_shape_slice, fold_shape
            ]
            for shape_fold_func in SHAPE_FOLD_FUNCS:
                try:
                    for tensor in clone_tensors.values():
                        shape_of = shape_fold_func(tensor)

                        if shape_of is not None:
                            G_LOGGER.ultra_verbose(
                                "Folding shape tensor: {:} to: {:}".format(
                                    tensor.name, shape_of))
                            graph_constants[tensor.name] = tensor.to_constant(
                                shape_of)
                            graph_constants[tensor.name].inputs.clear()
                except Exception as err:
                    if not error_ok:
                        raise err
                    G_LOGGER.warning("'{:}' routine failed with:\n{:}".format(
                        shape_fold_func.__name__, err))
                else:
                    graph_constants = update_foldable_outputs(graph_constants)

        def partition_and_infer(subgraph):
            def get_out_node_ids():
                # Gets the final output nodes - producer nodes of graph output tensors without other outputs.
                with subgraph.node_ids():
                    out_node_ids = set()
                    for out in subgraph.outputs:
                        if not out.outputs and not isinstance(out, Constant):
                            for n_inp in out.inputs:
                                out_node_ids.add(n_inp.id)
                return out_node_ids

            # Compute each output node in a separate subgraph.
            out_node_ids = get_out_node_ids()
            constant_values = {}

            for index in out_node_ids:  # Have to use index since 'node' is not in part
                part = subgraph.copy()
                out_node = part.nodes[index]
                part.outputs = out_node.outputs
                part.name = "Folding: {:}".format(
                    [out.name for out in part.outputs])
                part.cleanup(remove_unused_graph_inputs=True)
                names = [out.name for out in part.outputs]

                try:
                    # Determining types is not trivial, and ONNX-RT does its own type inference.
                    sess = rt.InferenceSession(
                        export_onnx(part,
                                    do_type_check=False).SerializeToString())
                    values = sess.run(names, {})
                except Exception as err:
                    G_LOGGER.warning(
                        "Inference failed for subgraph: {:}. Note: Error was:\n{:}"
                        .format(part.name, err))
                    if partitioning == "recursive":
                        G_LOGGER.verbose(
                            "Attempting to recursively partition subgraph")
                        # Partition failed, peel off last node.
                        # We only need to remove one node, so avoid doing an expensive call to cleanup()
                        part.outputs = out_node.inputs
                        del part.nodes[part.nodes.index(out_node)]
                        out_node.outputs.clear()
                        out_node.inputs.clear()
                    else:
                        G_LOGGER.info(
                            "You may see better results if you set partitioning='recursive'"
                        )
                        if not error_ok:
                            raise err

                    constant_values.update(partition_and_infer(part))
                else:
                    constant_values.update(
                        {name: val
                         for name, val in zip(names, values)})

            return constant_values

        # Next, evaluate the foldable variables with ONNX-Runtime

        # Only evaluate foldable values that have non-foldable outputs or are graph outputs.
        # Otherwise, if all the outputs are foldable, then we can just evaluate the outputs directly.
        def should_eval_foldable(tensor):
            non_const = not isinstance(tensor, Constant)
            is_graph_output = not tensor.outputs
            has_non_foldable_outputs = any(out.name not in graph_constants
                                           for out in tensor.outputs)
            return non_const and (is_graph_output or has_non_foldable_outputs)

        graph_clone.outputs = [
            t for t in graph_constants.values() if should_eval_foldable(t)
        ]
        G_LOGGER.debug("Folding tensors: {:}".format(graph_clone.outputs))
        graph_clone.cleanup(remove_unused_graph_inputs=True)

        # Using ._values avoids a deep copy of the values.
        constant_values = {
            name: tensor._values
            for name, tensor in graph_constants.items()
            if isinstance(tensor, Constant)
        }
        if graph_clone.outputs:
            if partitioning:
                constant_values.update(partition_and_infer(graph_clone))
            else:
                names = [t.name for t in graph_clone.outputs]
                try:
                    sess = rt.InferenceSession(
                        export_onnx(graph_clone,
                                    do_type_check=False).SerializeToString())
                    values = sess.run(names, {})
                    constant_values.update(
                        {name: val
                         for name, val in zip(names, values)})
                except Exception as err:
                    G_LOGGER.warning(
                        "Inference failed. You may want to try enabling partitioning to see better results. "
                        "Note: Error was:\n{:}".format(err))
                    G_LOGGER.verbose(
                        "Note: Graph was:\n{:}".format(graph_clone))
                    if not error_ok:
                        raise
        elif not constant_values:
            G_LOGGER.info(
                "Could not find any nodes in this graph ({:}) that can be folded. "
                "This could mean that constant folding has already been run on this graph. "
                "Skipping.".format(self.name))

        # Finally, replace the Variables in the original graph with constants.
        if constant_values:
            graph_tensors = self.tensors()
            for name, values in constant_values.items():
                tensor = graph_tensors[name]
                if not isinstance(tensor, Constant):
                    tensor.to_constant(values)
                    tensor.inputs.clear()  # Constants do not need inputs

        # Folding subgraphs after the outer graph can lead to better folding.
        def fold_subgraphs():
            for node in self.nodes:
                for attr in node.attrs.values():
                    if isinstance(attr, Graph):
                        attr.fold_constants(fold_shapes=fold_shapes,
                                            partitioning=partitioning)

        if recurse_subgraphs:
            fold_subgraphs()

        return self
Пример #3
0
    def fold_constants(self,
                       fold_shapes=True,
                       recurse_subgraphs=True,
                       partitioning=None,
                       error_ok=True):
        """
        Folds constants in-place in the graph. The graph must be topologically sorted prior to
        calling this function (see `toposort()`).

        This function will not remove constants after folding them. In order to get rid of
        these hanging nodes, you can run the `cleanup()` function.

        *Note: Due to how this function is implemented, the graph must be exportable to ONNX,
        and evaluable in ONNX-Runtime. Additionally, ONNX-Runtime must be installed.*

        Args:
            fold_shapes (bool):
                    Whether to fold `Shape` nodes in the graph.
                    This requires shapes to be inferred in the graph, and can only fold
                    static shapes.
                    Defaults to True.
            recurse_subgraphs (bool):
                    Whether to recursively fold constants in subgraphs.
                    Defaults to True.
            partitioning (Union[str, None]):
                    Whether/How to partition the graph so that errors in folding one
                    part of a model do not affect other parts. Available modes are:

                    - None: Do not partition the graph. If inference fails, no constants are folded.
                    - "basic": Partition the graph. If inference fails in one partition, other partitions will
                            remain unaffected.
                    - "recursive": Parition the graph recursively. If inference fails in a partition, the partition
                            will be further paritioned.

                    Defaults to None.
            error_ok (bool):
                    Whether inference errors should be suppressed.
                    When this is enabled, any errors encountered during inference will be re-raised.
                    Defaults to True.

        Returns:
            self
        """
        import onnxruntime as rt
        from onnx_graphsurgeon.exporters.onnx_exporter import export_onnx

        PARTITIONING_MODES = [None, "basic", "recursive"]
        if partitioning not in PARTITIONING_MODES:
            G_LOGGER.critical(
                "Argument for parameter 'partitioning' must be one of: {:}".
                format(PARTITIONING_MODES))

        G_LOGGER.debug("Folding constants in {:}".format(self.name))

        graph_clone = self.copy()
        clone_tensors = graph_clone.tensors()

        # We find graph constants in two passes:
        # Pass 1 finds all Constant tensors in the graph, then walks over their outputs.
        # Pass 2 searches for Shape nodes that have variable inputs (i.e. not marked const in pass 1)
        #    and turns them into Constants iff the input has a statically known shape.

        def update_foldable_outputs(graph_constants):
            def is_foldable(node):
                def all_tensors_const(tensors):
                    return all([t.name in graph_constants for t in tensors])

                if not all_tensors_const(node.inputs):
                    return False

                all_subgraph_foreign_tensors_const = True
                for attr in node.attrs.values():
                    if isinstance(attr, Graph):
                        foreign_tensors = attr._foreign_tensors().values()
                        all_subgraph_foreign_tensors_const &= all_tensors_const(
                            foreign_tensors)
                return all_subgraph_foreign_tensors_const

            # Walks along the outputs of graph_constants to see if they can also be computed statically.
            # Since the graph is topologically sorted, this should find all constant nodes in the graph.
            for node in graph_clone.nodes:
                if is_foldable(node):
                    graph_constants.update(
                        {out.name: out
                         for out in node.outputs})
            return graph_constants

        # Pass 1: Non-shape Constant Folding

        graph_constants = {
            name: tensor
            for name, tensor in clone_tensors.items()
            if isinstance(tensor, Constant)
        }

        # Replaces outputs of Constant nodes with constant tensors
        for tensor in clone_tensors.values():
            if len(tensor.inputs) == 1:
                node = tensor.inputs[0]
                if node.op == "Constant":
                    graph_constants[tensor.name] = tensor.to_constant(
                        node.attrs["value"]._values
                    )  # Using ._values avoids copying
                    graph_constants[tensor.name].inputs.clear()

        graph_constants = update_foldable_outputs(graph_constants)

        # Pass 2: Shape Folding

        def get_producer(tensor, op):
            """
            Get the producer of the specified tensor iff it matches op
            """
            if len(tensor.inputs) != 1:
                return None

            node = tensor.inputs[0]
            if node.op != op:
                return None
            return node

        def get_input(node, index=0):
            """
            Get the input tensor of a node iff the input tensor is not already marked a graph constant.
            """
            if node is None:
                return None

            inp = node.inputs[index]

            # If the input was already found to be a constant, it will be folded anyway.
            if inp.name in graph_constants:
                return None

            return inp

        def handle_shape(tensor):
            inp = get_input(get_producer(tensor, "Shape"))
            if inp is None:
                return None

            if inp.shape is None or misc.is_dynamic_shape(inp.shape):
                return None
            return np.array(inp.shape, dtype=np.int64)

        def handle_shape_gather(tensor):
            gather = get_producer(tensor, "Gather")
            if gather is None:
                return None

            data = gather.inputs[0]
            indices_tensor = gather.inputs[1]

            inp = get_input(get_producer(data, "Shape"))
            if inp is None or inp.shape is None:
                return None

            if not isinstance(indices_tensor, Constant):
                return None

            indices = indices_tensor.values
            if not indices.shape:  # Scalar-case
                shape = inp.shape[int(indices)]
                if misc.is_dynamic_dimension(shape):
                    return None
            else:
                shape = [inp.shape[index] for index in indices]
                if misc.is_dynamic_shape(shape):
                    return None

            return np.array(shape, dtype=np.int64)

        # Finds the static shape of a shape node output if possible, otherwise returns None.
        def lower_shape(tensor):
            SHAPE_FOLD_FUNCS = [handle_shape, handle_shape_gather]
            for fold_func in SHAPE_FOLD_FUNCS:
                shape = fold_func(tensor)
                if shape is not None:
                    return shape

        if fold_shapes:
            for tensor in clone_tensors.values():
                shape_of = lower_shape(tensor)

                if shape_of is not None:
                    G_LOGGER.ultra_verbose(
                        "Folding shape tensor: {:} to: {:}".format(
                            tensor.name, shape_of))
                    graph_constants[tensor.name] = tensor.to_constant(shape_of)
                    graph_constants[tensor.name].inputs.clear()

            graph_constants = update_foldable_outputs(graph_constants)

        def partition_and_infer(subgraph):
            def get_out_node_ids():
                # Gets the final output nodes - producer nodes of graph output tensors without other outputs.
                with subgraph.node_ids():
                    out_node_ids = set()
                    for out in subgraph.outputs:
                        if not out.outputs and not isinstance(out, Constant):
                            for n_inp in out.inputs:
                                out_node_ids.add(n_inp.id)
                return out_node_ids

            # Compute each output node in a separate subgraph.
            out_node_ids = get_out_node_ids()
            constant_values = {}

            for index in out_node_ids:  # Have to use index since 'node' is not in part
                part = subgraph.copy()
                out_node = part.nodes[index]
                part.outputs = out_node.outputs
                part.name = "Folding: {:}".format(
                    [out.name for out in part.outputs])
                part.cleanup(remove_unused_graph_inputs=True)
                names = [out.name for out in part.outputs]

                try:
                    # Determining types is not trivial, and ONNX-RT does its own type inference.
                    sess = rt.InferenceSession(
                        export_onnx(part,
                                    do_type_check=False).SerializeToString())
                    values = sess.run(names, {})
                except Exception as err:
                    G_LOGGER.warning(
                        "Inference failed for subgraph: {:}. Note: Error was:\n{:}"
                        .format(part.name, err))
                    if partitioning == "recursive":
                        G_LOGGER.verbose(
                            "Attempting to recursively partition subgraph")
                        # Partition failed, peel off last node.
                        # We only need to remove one node, so avoid doing an expensive call to cleanup()
                        part.outputs = out_node.inputs
                        del part.nodes[part.nodes.index(out_node)]
                        out_node.outputs.clear()
                        out_node.inputs.clear()
                    else:
                        G_LOGGER.info(
                            "You may see better results if you set partitioning='recursive'"
                        )
                        if not error_ok:
                            raise err

                    constant_values.update(partition_and_infer(part))
                else:
                    constant_values.update(
                        {name: val
                         for name, val in zip(names, values)})

            return constant_values

        # Next, evaluate the foldable variables with ONNX-Runtime
        graph_clone.outputs = [
            t for t in graph_constants.values() if not isinstance(t, Constant)
        ]
        graph_clone.cleanup(remove_unused_graph_inputs=True)

        # Using ._values avoids a deep copy of the values.
        constant_values = {
            name: tensor._values
            for name, tensor in graph_constants.items()
            if isinstance(tensor, Constant)
        }
        if graph_clone.outputs:
            if partitioning:
                constant_values.update(partition_and_infer(graph_clone))
            else:
                names = [t.name for t in graph_clone.outputs]
                try:
                    sess = rt.InferenceSession(
                        export_onnx(graph_clone,
                                    do_type_check=False).SerializeToString())
                    values = sess.run(names, {})
                    constant_values.update(
                        {name: val
                         for name, val in zip(names, values)})
                except Exception as err:
                    G_LOGGER.warning(
                        "Inference failed. You may want to try enabling partitioning to see better results. "
                        "Note: Error was:\n{:}".format(err))
                    G_LOGGER.verbose(
                        "Note: Graph was:\n{:}".format(graph_clone))
                    if not error_ok:
                        raise
        elif not constant_values:
            G_LOGGER.info(
                "Could not find any nodes in this graph ({:}) that can be folded. "
                "This could mean that constant folding has already been run on this graph. "
                "Skipping.".format(self.name))

        # Finally, replace the Variables in the original graph with constants.
        if constant_values:
            graph_tensors = self.tensors()
            for name, values in constant_values.items():
                tensor = graph_tensors[name]
                if not isinstance(tensor, Constant):
                    tensor.to_constant(values)
                    tensor.inputs.clear()  # Constants do not need inputs

        # Folding subgraphs after the outer graph can lead to better folding.
        def fold_subgraphs():
            for node in self.nodes:
                for attr in node.attrs.values():
                    if isinstance(attr, Graph):
                        attr.fold_constants(fold_shapes=fold_shapes,
                                            partitioning=partitioning)

        if recurse_subgraphs:
            fold_subgraphs()

        return self
Пример #4
0
    def cleanup(self,
                remove_unused_node_outputs=False,
                recurse_subgraphs=True,
                remove_unused_graph_inputs=False):
        """
        Removes unused nodes and tensors from the graph.
        A node or tensor is considered unused if it does not contribute to any of the graph outputs.

        Additionally, any producer nodes of graph input tensors, as well as consumer nodes of graph output
        tensors that are not in the graph, are removed from the graph.

        *Note: This function will never modify graph output tensors.*

        Args:
            remove_unused_node_outputs (bool): Whether to remove unused output tensors of nodes. This will never remove
                empty-tensor (i.e. optional, but omitted) outputs. Defaults to False.
            recurse_subgraphs (bool):
                    Whether to recursively cleanup subgraphs.
                    Defaults to True.
            remove_unused_graph_inputs (bool):
                    Whether to remove unused graph inputs.
                    Defaults to False.

        Returns:
            self
        """
        def cleanup_subgraphs():
            for node in self.nodes:
                for attr in node.attrs.values():
                    if isinstance(attr, Graph):
                        attr.cleanup(
                            remove_unused_node_outputs=
                            remove_unused_node_outputs,
                            remove_unused_graph_inputs=
                            remove_unused_graph_inputs,
                        )

        if recurse_subgraphs:
            cleanup_subgraphs()

        G_LOGGER.verbose("Cleaning up {:}".format(self.name))

        with self.node_ids():
            # Graph input producers must be removed first so used_node_ids is correct.
            for inp in self.inputs:
                inp.inputs.clear()

            used_node_ids, used_tensors = self._get_used_node_ids()

            inputs = []
            for inp in self.inputs:
                if inp in used_tensors or not remove_unused_graph_inputs:
                    inputs.append(inp)
                else:
                    G_LOGGER.ultra_verbose(
                        "Removing unused input: {:}".format(inp))
            self.inputs = inputs

            nodes = []
            for node in self.nodes:
                if self._get_node_id(node) in used_node_ids:
                    nodes.append(node)
                else:
                    node.inputs.clear()
                    node.outputs.clear()
                    G_LOGGER.ultra_verbose(
                        "Removing unused node: {:}".format(node))

            # Remove any hanging tensors - tensors without outputs
            if remove_unused_node_outputs:
                graph_output_names = set(
                    [tensor.name for tensor in self.outputs])
                for node in nodes:

                    def is_hanging_tensor(tensor):
                        return (not tensor.is_empty()
                                and len(tensor.outputs) == 0
                                and tensor.name not in graph_output_names)

                    to_remove = [
                        out for out in node.outputs if is_hanging_tensor(out)
                    ]
                    for out in to_remove:
                        if out in node.outputs:
                            node.outputs.remove(out)

            self.nodes = nodes
            return self