def ZerosLikeOutsideLoop(op, index): """Create zeros_like for the specified output of an op.""" val = op.outputs[index] if not util.IsSwitch(op): if val.dtype == dtypes.resource: return array_ops.zeros( gen_resource_variable_ops.variable_shape(val), dtype=default_gradient.get_zeros_dtype(val)) return array_ops.zeros_like(val, optimize=False) else: op_ctxt = op._get_control_flow_context() if op_ctxt: # We are in a cond context. Use a switch to create zeros only when needed. pred = op_ctxt.pred branch = op_ctxt.branch switch_val = control_flow_ops.switch(op.inputs[0], pred)[1 - branch] # A op is created along the branch taken as control dependencies are on # the whole op and not on the tensor output. pivot = array_ops.identity(switch_val) if val.dtype == dtypes.resource: with ops.control_dependencies([pivot]): return array_ops.zeros( gen_resource_variable_ops.variable_shape(switch_val), dtype=default_gradient.get_zeros_dtype(val)) zeros_shape = array_ops.shape_internal(switch_val, optimize=False) # Ensure ops created within array_ops.zeros are dominated by switch in # cond context. with ops.control_dependencies([pivot]): return array_ops.zeros(zeros_shape, dtype=val.dtype) else: return array_ops.zeros_like(val, optimize=False)
def _create_grad_indexed_slices_init(grad_output_slices, forward_input): """Creates an IndexedSlices to pass as input to the while grad function. Args: grad_output_slices: IndexedSlices. The corresponding while grad function output. forward_input: Tensor. The corresponding input to the forward while op. Returns: Zeros IndexedSlices, created in current Graph. """ assert isinstance(grad_output_slices, indexed_slices.IndexedSlices) assert isinstance(forward_input, ops.Tensor) values_out = grad_output_slices.values indices_out = grad_output_slices.indices # Create the initial values tensor. if values_out.shape.is_fully_defined(): values_shape = tensor_shape.TensorShape([0] + values_out.shape.as_list()[1:]) values = array_ops.zeros(values_shape, dtype=values_out.dtype, name="values_init") else: if forward_input.dtype == dtypes.resource: forward_shape = gen_resource_variable_ops.variable_shape( forward_input) else: forward_shape = array_ops.shape(forward_input) values_shape = array_ops.concat([[0], forward_shape[1:]], 0) values = array_ops.zeros(values_shape, dtype=values_out.dtype, name="values_init") # Create the initial indices tensor. indices = constant_op.constant([], indices_out.dtype, name="indices_init") # Create the initial dense_shape tensor. We assume is the same shape as # forward_input, since captured tensors don't change shape across loop # iterations. if forward_input.dtype == dtypes.resource: shape = gen_resource_variable_ops.variable_shape(forward_input, name="shape_init") else: shape = array_ops.shape(forward_input, name="shape_init") return indexed_slices.IndexedSlices(values=values, indices=indices, dense_shape=shape)
def shape(self): """The shape of this variable.""" if self._in_graph_mode: return tensor_shape.TensorShape(self._handle.op.get_attr("shape")) return tensor_shape.TensorShape( tensor_util.constant_value( gen_resource_variable_ops.variable_shape(self._handle)))
def shape(self): """The shape of this variable.""" if self._in_graph_mode: return tensor_shape.TensorShape(self._handle.op.get_attr("shape")) return tensor_shape.TensorShape( tensor_util.constant_value( gen_resource_variable_ops.variable_shape(self._handle)))
def _zeros_like(op_output): """Like array_ops.zeros_like() but also accepts resource var handles.""" if op_output.dtype == dtypes.resource: return array_ops.zeros( gen_resource_variable_ops.variable_shape(op_output), dtype=default_gradient.get_zeros_dtype(op_output)) return array_ops.zeros_like(op_output)
def _create_grad_indexed_slices_init(grad_output_slices, forward_input): """Creates an IndexedSlices to pass as input to the while grad function. Args: grad_output_slices: IndexedSlices. The corresponding while grad function output. forward_input: Tensor. The corresonding input to the forward while op. Returns: Zeros IndexedSlices, created in current Graph. """ assert isinstance(grad_output_slices, ops.IndexedSlices) assert isinstance(forward_input, ops.Tensor) values_out = grad_output_slices.values indices_out = grad_output_slices.indices # Create the initial values tensor. if values_out.shape.is_fully_defined(): values_shape = tensor_shape.TensorShape([0] + values_out.shape.as_list()[1:]) values = array_ops.zeros(values_shape, dtype=values_out.dtype, name="values_init") else: if forward_input.dtype == dtypes.resource: forward_shape = gen_resource_variable_ops.variable_shape(forward_input) else: forward_shape = array_ops.shape(forward_input) values_shape = array_ops.concat([[0], forward_shape[1:]], 0) values = array_ops.zeros(values_shape, dtype=values_out.dtype, name="values_init") # Create the initial indices tensor. indices = constant_op.constant([], indices_out.dtype, name="indices_init") # Create the initial dense_shape tensor. We assume is the same shape as # forward_input, since captured tensors don't change shape across loop # iterations. if forward_input.dtype == dtypes.resource: shape = gen_resource_variable_ops.variable_shape(forward_input, name="shape_init") else: shape = array_ops.shape(forward_input, name="shape_init") return ops.IndexedSlices(values=values, indices=indices, dense_shape=shape)
def _ResourceGatherNdGrad(op, grad): # pylint: disable=missing-docstring ref = op.inputs[0] indices = op.inputs[1] ref_shape = gen_resource_variable_ops.variable_shape(ref, indices.dtype) if indices.shape.ndims == 2 and indices.shape.dims[-1].value == 1: ref_grad = ops.IndexedSlices(grad, array_ops.squeeze(indices, axis=-1), ref_shape) else: ref_grad = array_ops.scatter_nd(indices, grad, ref_shape) return [ref_grad, None]
def _GatherGrad(op, grad): """Gradient for gather op.""" # Build appropriately shaped IndexedSlices handle = op.inputs[0] indices = op.inputs[1] params_shape = gen_resource_variable_ops.variable_shape(handle) size = array_ops.expand_dims(array_ops.size(indices), 0) values_shape = array_ops.concat([size, params_shape[1:]], 0) values = array_ops.reshape(grad, values_shape) indices = array_ops.reshape(indices, size) return (ops.IndexedSlices(values, indices, params_shape), None)
def _GatherGrad(op, grad): """Gradient for gather op.""" # Build appropriately shaped IndexedSlices handle = op.inputs[0] indices = op.inputs[1] params_shape = gen_resource_variable_ops.variable_shape(handle) size = array_ops.expand_dims(array_ops.size(indices), 0) values_shape = array_ops.concat([size, params_shape[1:]], 0) values = array_ops.reshape(grad, values_shape) indices = array_ops.reshape(indices, size) return (ops.IndexedSlices(values, indices, params_shape), None)
def _grad_fn(func_graph, grads): """The gradient function for each conditional branch. This function builds the gradient graph of the corresponding forward-pass conditional branch in `func_graph`. This is done by differentiating func_graph's outputs w.r.t. its inputs. Args: func_graph: FuncGraph. The corresponding forward-pass function. grads: The list of input gradient Tensors. Returns: The output gradient Tensors. """ # Filter out untrainable function outputs. # NOTE(skyewm): If we don't do this, the untrainable tensors can sometimes # cause _GradientsHelper to raise an exception (e.g. the implementation # doesn't expect 'ys' to contain boolean tensors). assert len(func_graph.outputs) == len(grads) ys = [] grad_ys = [] for y, grad_y in zip(func_graph.outputs, grads): if not backprop_util.IsTrainable(y): continue ys.append(y) grad_ys.append(grad_y) # Build the gradient graph. Note that this builds the gradient computation of # func_graph in the current graph, which requires capturing tensors from # func_graph. The captured func_graph tensors are resolved to external tensors # in _resolve_grad_inputs. result = gradients_util._GradientsHelper(ys, func_graph.inputs, grad_ys=grad_ys, src_graph=func_graph) # Functions can't return None; replace Nones with zero tensors. # TODO(b/80444525): don't return anything here and make _IfGrad return None if # both branches have zero gradient. for i in range(len(result)): if result[i] is None: if func_graph.inputs[i].dtype == dtypes.resource: result[i] = array_ops.zeros( gen_resource_variable_ops.variable_shape( func_graph.inputs[i]), dtype=default_gradient.get_zeros_dtype( func_graph.inputs[i])) else: result[i] = array_ops.zeros_like(func_graph.inputs[i]) return result
def _GatherGrad(op, grad): """Gradient for gather op.""" # Build appropriately shaped IndexedSlices handle = op.inputs[0] indices = op.inputs[1] if context.in_graph_mode(): # Walk graph back until the original handle is found. # TODO(apassos): implement this for EAGER mode. while handle.op.type != "VarHandleOp": handle = handle.op.inputs[0] params_shape = gen_resource_variable_ops.variable_shape(handle) size = array_ops.expand_dims(array_ops.size(indices), 0) values_shape = array_ops.concat([size, params_shape[1:]], 0) values = array_ops.reshape(grad, values_shape) indices = array_ops.reshape(indices, size) return (ops.IndexedSlices(values, indices, params_shape), None)
def _GatherGrad(op, grad): """Gradient for gather op.""" # Build appropriately shaped IndexedSlices handle = op.inputs[0] indices = op.inputs[1] if context.in_graph_mode(): # Walk graph back until the original handle is found. # TODO(apassos): implement this for EAGER mode. while handle.op.type != "VarHandleOp": handle = handle.op.inputs[0] params_shape = gen_resource_variable_ops.variable_shape(handle) size = array_ops.expand_dims(array_ops.size(indices), 0) values_shape = array_ops.concat([size, params_shape[1:]], 0) values = array_ops.reshape(grad, values_shape) indices = array_ops.reshape(indices, size) return (ops.IndexedSlices(values, indices, params_shape), None)
def _grad_fn(func_graph, grads): """The gradient function for each conditional branch. This function builds the gradient graph of the corresponding forward-pass conditional branch in `func_graph`. This is done by differentiating func_graph's outputs w.r.t. its inputs. Args: func_graph: FuncGraph. The corresponding forward-pass function. grads: The list of input gradient Tensors. Returns: The output gradient Tensors. """ # Filter out untrainable function outputs. # NOTE(skyewm): If we don't do this, the untrainable tensors can sometimes # cause _GradientsHelper to raise an exception (e.g. the implementation # doesn't expect 'ys' to contain boolean tensors). assert len(func_graph.outputs) == len(grads) ys = [] grad_ys = [] for y, grad_y in zip(func_graph.outputs, grads): if not gradients_impl.IsTrainable(y): continue ys.append(y) grad_ys.append(grad_y) # Build the gradient graph. Note that this builds the gradient computation of # func_graph in the current graph, which requires capturing tensors from # func_graph. The captured func_graph tensors are resolved to external tensors # in _resolve_grad_inputs. result = gradients_impl._GradientsHelper( ys, func_graph.inputs, grad_ys=grad_ys, src_graph=func_graph) # Functions can't return None; replace Nones with zero tensors. # TODO(b/80444525): don't return anything here and make _IfGrad return None if # both branches have zero gradient. for i in range(len(result)): if result[i] is None: if func_graph.inputs[i].dtype == dtypes.resource: result[i] = array_ops.zeros( gen_resource_variable_ops.variable_shape(func_graph.inputs[i])) else: result[i] = array_ops.zeros_like(func_graph.inputs[i]) return result
def _GatherGrad(op, grad): """Gradient for gather op.""" # Build appropriately shaped IndexedSlices # Walk graph back until the original handle is found. # TODO(apassos): more robust way of getting the shape. # TODO(apassos): implement this for EAGER mode. if context.in_eager_mode(): dense_shape = gen_resource_variable_ops.variable_shape(op.inputs[0]) return (ops.IndexedSlices(grad, op.inputs[1], dense_shape=dense_shape), None) handle = op.inputs[0] while handle.op.type != "VarHandleOp": handle = handle.op.inputs[0] params_shape = ops.convert_to_tensor( tensor_shape.TensorShape(handle.op.get_attr("shape"))) indices = op.inputs[1] size = array_ops.expand_dims(array_ops.size(indices), 0) values_shape = array_ops.concat([size, params_shape[1:]], 0) values = array_ops.reshape(grad, values_shape) indices = array_ops.reshape(indices, size) return [ops.IndexedSlices(values, indices, params_shape), None]
def _ZerosLikeV2(op, index): """Branch of ZerosLike for TF2.""" val = op.outputs[index] if val.dtype == dtypes.resource: return array_ops.zeros( gen_resource_variable_ops.variable_shape(val), dtype=default_gradient.get_zeros_dtype(val)) if (isinstance(val.op.graph, control_flow_v2_func_graphs.WhileBodyFuncGraph) and val.dtype != dtypes.variant): # In while_v2 we do not want to add a `ZerosLike` op because that will # trigger accumulation of `val`. Normally `ZerosLike` is preferred because # it helps avoid creating extra nodes(possibly Consts) for the shape. # For variants, we must use ZerosLike. if val.shape.is_fully_defined(): return constant_op.constant(0, shape=val.shape.dims, dtype=val.dtype) else: # Note: Even though we add `Shape` in the default graph, while_v2 is smart # enough to place it in the forward graph i.e. `val.graph`. zeros_shape = array_ops.shape_internal(val, optimize=False) return array_ops.zeros(zeros_shape, val.dtype) else: return array_ops.zeros_like(val, optimize=False)
def _GatherGrad(op, grad): """Gradient for gather op.""" # Build appropriately shaped IndexedSlices # Walk graph back until the original handle is found. # TODO(apassos): more robust way of getting the shape. # TODO(apassos): implement this for EAGER mode. if context.in_eager_mode(): dense_shape = gen_resource_variable_ops.variable_shape(op.inputs[0]) return (ops.IndexedSlices(grad, op.inputs[1], dense_shape=dense_shape), None) handle = op.inputs[0] while handle.op.type != "VarHandleOp": handle = handle.op.inputs[0] params_shape = ops.convert_to_tensor( tensor_shape.TensorShape(handle.op.get_attr("shape"))) indices = op.inputs[1] size = array_ops.expand_dims(array_ops.size(indices), 0) values_shape = array_ops.concat([size, params_shape[1:]], 0) values = array_ops.reshape(grad, values_shape) indices = array_ops.reshape(indices, size) return [ops.IndexedSlices(values, indices, params_shape), None]
def _zeros_like(op_output): """Like array_ops.zeros_like() but also accepts resource var handles.""" if op_output.dtype == dtypes.resource: return array_ops.zeros( gen_resource_variable_ops.variable_shape(op_output)) return array_ops.zeros_like(op_output)
def grad(dresult): return ops.IndexedSlices( dresult, indices, dense_shape=gen_resource_variable_ops.variable_shape(resource))
def grad(dresult): return ops.IndexedSlices( dresult, indices, dense_shape=gen_resource_variable_ops.variable_shape(resource))
def shape(self): """The shape of this variable.""" if context.in_graph_mode(): return tensor_shape.TensorShape(self._handle.op.get_attr("shape")) return tensor_shape.TensorShape( gen_resource_variable_ops.variable_shape(self._handle).numpy())