def create_quantize_graph(config_file, graph=tf.get_default_graph()):
    """
    全图量化
    :param config: 量化方式配置
    :param graph: 需要量化的graph
    :return: 添加量化算子的graph
    """
    config = Config(config_file)
    ops = graph.get_operations()
    quant_tensors = {}  # 存放已经量化过的tensor,避免重复量化
    quantize_func = quantize_for_train if config.is_training else quantize_for_eval
    quant_layers = set(tf.get_collection("quant_layers"))
    tf.logging.info(
        "-------------------------Quantize-------------------------")

    # 遍历所有op,对前向算子进行量化
    for op in ops:
        if "gradient" not in op.name and op.type in config.forward_quant_ops and op.name not in quant_layers:
            inp1, inp2 = op.inputs._inputs
            tf.logging.info("Forward Quant:%s" % op.name)
            ctxt = util.GetOutputContext(op)
            while_ctxt = util.GetContainingWhileContext(ctxt)
            graph._set_control_flow_context(ctxt)
            quant_inp1 = quantize_func(inp1, config.get_config(op.type, op.name, "input"))\
                if inp1.name not in quant_tensors else quant_tensors[inp1.name]
            quant_inp2 = quantize_func(inp2, config.get_config(op.type, op.name, "weight")) \
                if inp2.name not in quant_tensors else quant_tensors[inp2.name]
            quant_tensors[inp1.name] = quant_inp1
            quant_tensors[inp2.name] = quant_inp2
            tf.contrib.graph_editor.reroute_ts([quant_inp1, quant_inp2],
                                               op.inputs._inputs,
                                               can_modify=op)
            tf.add_to_collection("quant_layers", op.name)

    # 遍历所有op,对所有反向算子进行量化
    for op in ops:
        if "gradient" in op.name and op.type in config.backward_quant_ops and op.name not in quant_layers:
            tf.logging.info("Backward Quant:%s" % op.name)
            new_inputs = []
            for inp in op.inputs._inputs:
                if "ShapeN" not in inp.name:
                    quant_inp = quantize_func(inp, config.get_config("Gradient", op.name, "gradient")) \
                        if inp.name not in quant_tensors else quant_tensors[inp.name]
                    quant_tensors[inp.name] = quant_inp
                    new_inputs.append(quant_inp)
                else:
                    new_inputs.append(inp)
            tf.contrib.graph_editor.reroute_ts(new_inputs,
                                               op.inputs._inputs,
                                               can_modify=op)
            tf.add_to_collection("quant_layers", op.name)

    graph._set_control_flow_context(None)
    return graph
Exemplo n.º 2
0
def _MergeGrad(op, grad, _):
    """Gradients for a Merge op are calculated using a Switch op."""
    input_op = op.inputs[0].op
    graph = ops.get_default_graph()
    # pylint: disable=protected-access
    op_ctxt = control_flow_util.GetOutputContext(input_op)
    grad_ctxt = graph._get_control_flow_context()
    # pylint: enable=protected-access
    if isinstance(op_ctxt, WhileContext):
        # pylint: disable=protected-access
        return control_flow_ops._SwitchRefOrTensor(grad, grad_ctxt.pivot)
        # pylint: enable=protected-access
    elif isinstance(op_ctxt, CondContext):
        pred = op_ctxt.pred
        if grad_ctxt and grad_ctxt.grad_state:
            # This Merge node is part of a cond within a loop.
            # The backprop needs to have the value of this predicate for every
            # iteration. So we must have its values accumulated in the forward, and
            # use the accumulated values as the predicate for this backprop switch.
            grad_state = grad_ctxt.grad_state
            real_pred = grad_state.history_map.get(pred.name)
            if real_pred is None:
                # Remember the value of pred for every iteration.
                grad_ctxt = grad_state.grad_context
                grad_ctxt.Exit()
                history_pred = grad_state.AddForwardAccumulator(pred)
                grad_ctxt.Enter()

                # Add the stack pop op. If pred.op is in a (outer) CondContext,
                # the stack pop will be guarded with a switch.
                real_pred = grad_state.AddBackpropAccumulatedValue(
                    history_pred, pred)
                grad_state.history_map[pred.name] = real_pred
            pred = real_pred
        # pylint: disable=protected-access
        return control_flow_ops._SwitchRefOrTensor(grad,
                                                   pred,
                                                   name="cond_grad")
        # pylint: enable=protected-access
    else:
        num_inputs = len(op.inputs)
        cond = [math_ops.equal(op.outputs[1], i) for i in xrange(num_inputs)]
        # pylint: disable=protected-access
        return [
            control_flow_ops._SwitchRefOrTensor(grad, cond[i])[1]
            for i in xrange(num_inputs)
        ]
Exemplo n.º 3
0
def _ConcatGradHelper(op, grad, start_value_index, end_value_index, dim_index):
    """Gradient for concat op.

  Args:
    op: An operation.
    grad: `Tensor` or `IndexedSlices` representing the gradients with respect
      to each output of the op.
    start_value_index: An integer index of the first value in the op.inputs.
    end_value_index: An integer index of the last value in the op.inputs.
    dim_index: An interger index of concat_dim or axis parameter in op.inputs.

  Returns:
    Tensors representing the partial gradients with respect to each input
    of the op.

  Raises:
    ValueError: if concat_dim/axis is not statically known.
  """
    def _CreateDenseMaskAndBegin(sizes, concat_dim):
        """Create variables for iteratively slicing a dense gradients tensor."""
        # Since shape is 1-D, shape_of_shape = [rank-of-inputs]
        shape_of_shape = array_ops.shape(sizes[0])
        # Make a vector of length equal to the input's dimensions,
        # with 0's everywhere and 1 in the concat dim position.
        # Note: Can't use sparse_to_dense since it isn't GPU-capable (for now)
        mask = array_ops.concat([
            array_ops.fill(array_ops.expand_dims(concat_dim, 0), 0), [1],
            array_ops.fill(shape_of_shape - concat_dim - 1, 0)
        ], 0)
        begin = array_ops.fill(shape_of_shape, 0)
        return mask, begin

    def _ExtractInputShapes(inputs):
        """Extract the shapes of a set of input tensors."""
        if context.executing_eagerly():
            return array_ops.shape_n(inputs)
        sizes = []
        fully_known = True
        for x in inputs:
            input_shape = array_ops.shape(x)
            if not isinstance(input_shape,
                              ops.Tensor) or input_shape.op.type != "Const":
                fully_known = False
                break
            sizes.append(input_shape)

        if fully_known:
            return sizes
        else:
            return array_ops.shape_n(inputs)

    # Degenerate concatenation, just return grad.
    if len(op.inputs) == 2:
        return grad + [None] if end_value_index <= dim_index else [None] + grad

    concat_dim = op.inputs[dim_index]
    input_values = op.inputs[start_value_index:end_value_index]

    out_grads = []
    if isinstance(grad, ops.Tensor):
        if context.executing_eagerly():
            # Using mod here for convenience since concat_dim is already verified
            # in concat implementation to be within the allowed [-rank, rank) range.
            non_neg_concat_dim = (concat_dim._numpy().item(0) %
                                  input_values[0]._rank())  # pylint: disable=protected-access
            # All inputs are guaranteed to be EagerTensors in eager mode
            sizes = pywrap_tensorflow.TFE_Py_TensorShapeSlice(
                input_values, non_neg_concat_dim)
            out_grads = array_ops.split(grad, sizes, non_neg_concat_dim)
        else:
            if constant_op.is_constant(concat_dim):
                # If concat_dim is a constant defined in a different context,
                # then we duplicate it in the current context to avoid passing it
                # through an Enter node.
                # This is a small optimization in general, but it is required when
                # compiling with XLA, as XLA needs the concat input to be folded into a
                # constant.
                grad_context = control_flow_util.GetOutputContext(grad.op)
                dim_context = control_flow_util.GetOutputContext(concat_dim.op)
                if dim_context != grad_context:
                    value = tensor_util.constant_value(concat_dim)
                    concat_dim = constant_op.constant(value=value,
                                                      dtype=concat_dim.dtype)

            # Using mod here for convenience since concat_dim is already verified
            # in concat implementation to be within the allowed [-rank, rank) range.
            non_neg_concat_dim = concat_dim % array_ops.rank(input_values[0])

            # Get the inputs' tensor shapes
            sizes = _ExtractInputShapes(input_values)
            # The magic number of 16 was found through benchmarking a range of sizes
            # on CPUs and a Maxwell TitanX.  A speedup was seen in a large majority of
            # cases when switching implementations at N=16, but it is possible that
            # there will be a small number of performance regressions.
            if len(sizes) > 16:
                # extract the size of each input along the concat dimension
                sizes = array_ops.squeeze(
                    array_ops.slice(array_ops.stack(sizes, axis=1),
                                    [non_neg_concat_dim, 0], [1, -1]))
                out_grads = array_ops.split(grad, sizes, non_neg_concat_dim)
            else:
                offset = gen_array_ops.concat_offset(non_neg_concat_dim, sizes)
                for (begin, size) in zip(offset, sizes):
                    out_grads.append(array_ops.slice(grad, begin, size))
    elif isinstance(grad, ops.IndexedSlices):
        # Using mod here for convenience since concat_dim is already verified
        # in concat implementation to be within the allowed [-rank, rank) range.
        non_neg_concat_dim = concat_dim % array_ops.rank(input_values[0])
        concat_dim_static = tensor_util.constant_value(concat_dim)
        if concat_dim_static is None:
            raise ValueError("Can only compute IndexedSlices gradient with "
                             "statically-known concat_dim")
        if concat_dim_static < 0:
            rank = tensor_util.constant_value(array_ops.rank(input_values[0]))
            if rank is None:
                raise ValueError(
                    "Can only compute IndexedSlices gradient with "
                    "negative concat_dim when first value rank is "
                    "statically-known.")
            concat_dim_static %= rank
        # Get the inputs' tensor shapes
        sizes = [array_ops.shape(x) for x in input_values]
        if concat_dim_static > 0:
            # IndexedSlices, non_neg_concat_dim > 0. Each input gets IndexedSlices
            # gradients with all the indices, but with grad.values sliced accordingly.
            # This is like the Tensor case, except shape(grad.values)[0] is not equal
            # to shape(sizes[i])[0], since only a subset of the dim-0 values are
            # stored.
            mask, begin = _CreateDenseMaskAndBegin(sizes, non_neg_concat_dim)
            for size in sizes:
                new_values = array_ops.slice(
                    grad.values, begin,
                    array_ops.concat(
                        [[-1], array_ops.slice(size, [1], [-1])], 0))
                out_grads.append(
                    ops.IndexedSlices(new_values, grad.indices, size))
                # Lint complains begin = begin + ...
                begin = math_ops.add(begin, size * mask)
        else:
            # IndexedSlices, concat_dim == 0. Each input gets IndexedSlices gradients
            # only for the relevant indices.
            start = constant_op.constant(0, dtype=grad.indices.dtype)
            for size in sizes:
                size_concat_dim = array_ops.gather(size, non_neg_concat_dim)
                if size_concat_dim.dtype != grad.indices.dtype:
                    size_concat_dim = math_ops.cast(size_concat_dim,
                                                    dtype=grad.indices.dtype)
                end = start + size_concat_dim
                # Compute the 1-D Tensor of indices relevant for this input.
                indices_to_select = array_ops.squeeze(array_ops.where(
                    math_ops.logical_and(grad.indices >= start,
                                         grad.indices < end)),
                                                      axis=[1])
                new_indices = array_ops.gather(grad.indices,
                                               indices_to_select) - start
                new_values = array_ops.gather(grad.values, indices_to_select)
                out_grads.append(
                    ops.IndexedSlices(new_values, new_indices, size))
                start = end
    else:
        raise TypeError("Expected Tensor or IndexedSlices, got %s" %
                        type(grad))

    return (out_grads + [None] if end_value_index <= dim_index else [None] +
            out_grads)
Exemplo n.º 4
0
    def AddForwardAccumulator(self, value, dead_branch=False):
        """Add an accumulator for each forward tensor that is needed in backprop.

    This is added to the forward loop at the first time when a tensor
    in the forward loop is used by backprop gradient computation loop.
    We create an accumulator that accumulates the value of tensor at each
    iteration. Called in the control flow context where gradients() is called.

    The pseudocode is:
    ```
      acc = stack();
      while (_pivot) {
        acc = stack_push(acc, value);
      }
    ```

    We make sure that the stack push op in one iteration is executed before
    next iteration. This is achieved by adding a control edge from
    `forward_index.op.inputs[0].op` to the push op, and another control
    edge from the push op to either `forward_index.op` or `forward_sync`.

    Args:
      value: The source tensor in forward that is to be accumulated.
      dead_branch: True iff the tensor is on a dead branch of a cond.

    Returns:
      The stack that contains the accumulated history of the tensor.

    Raises:
      TypeError: For internal errors involving the value condition context.
      ValueError: If `value` is inside a XLA scope and a valid max size
        for the stack can't be found.
    """
        # curr_ctxt is the context that tf.gradients was called in.
        with self._forward_index.graph.as_default():
            curr_ctxt = ops.get_default_graph()._get_control_flow_context()  # pylint: disable=protected-access
            with ops.control_dependencies(None):
                if curr_ctxt:
                    curr_ctxt.Enter()
                with ops.colocate_with(value):
                    # We only need to pass maximum_iterations to the stack if
                    # we're inside an XLA context.
                    if not util.IsInXLAContext(value.op):
                        max_size = constant_op.constant(-1, dtypes.int32)
                    else:
                        max_size = _GetMaxSizeFromNestedMaximumIterations(
                            value, self.forward_context)
                    acc = gen_data_flow_ops.stack_v2(
                        max_size=max_size,
                        elem_type=value.dtype.base_dtype,
                        name="f_acc")
                if curr_ctxt:
                    curr_ctxt.Exit()

                # Make acc available in the forward context.
                enter_acc = self.forward_context.AddValue(acc)

                # Add the stack_push op in the context of value.op.
                swap_enabled = self.forward_context.swap_memory
                value_ctxt = util.GetOutputContext(value.op)
                if value_ctxt == self.forward_context:
                    # value is not nested in the forward context.
                    self.forward_context.Enter()
                    push = gen_data_flow_ops.stack_push_v2(
                        enter_acc, value, swap_memory=swap_enabled)
                    self.forward_context.Exit()
                    # Protect stack push and order it before forward_index.
                    self.forward_index.op._add_control_input(push.op)
                else:
                    # value is in a cond context within the forward context.
                    if not isinstance(value_ctxt,
                                      control_flow_ops.CondContext):
                        raise TypeError("value_ctxt is not a CondContext: %s" %
                                        value_ctxt)
                    if dead_branch:
                        # The special case for creating a zero tensor for a dead
                        # branch of a switch. See _ControlFlowState.ZerosLike().
                        value_ctxt.outer_context.Enter()
                        push = gen_data_flow_ops.stack_push_v2(
                            enter_acc, value, swap_memory=swap_enabled)
                        value_ctxt.outer_context.Exit()
                        push.op._set_control_flow_context(value_ctxt)
                    else:
                        value_ctxt.Enter()
                        push = gen_data_flow_ops.stack_push_v2(
                            enter_acc, value, swap_memory=swap_enabled)
                        value_ctxt.Exit()
                    # Protect stack push and order it before forward_sync.
                    self.forward_sync._add_control_input(push.op)
                # Order stack push after the successor of forward_index
                add_op = self.forward_index.op.inputs[0].op
                push.op._add_control_input(add_op)
                return acc