def remote_load_( t: Tensor, offset: Optional[Tensor] = None, remote_buffer_handle: Optional[RemoteBufferHandle] = None) -> Tensor: """Load a tensor from remote (off-chip) buffer inplace. This op is identical to `remote_load` with the exception of how `t` is handled. In `remote_load` `t` is cloned and the output is written to the clone, whereas in this version `t` is written to directly. See also: `remote_buffer_handle`, `remote_store`, `remote_load` Args: t (Tensor): The tensor the loaded data will written to the clone. offset (Optional[Tensor], optional): Optional 0-rank Tensor. Specify the row in the remote buffer the inTensor will be loaded from. Defaults to None. remote_buffer_handle (Optional[RemoteBufferHandle], optional): The handle to the remote buffer. Defaults to None. Returns: Tensor: The tensor loaded from the remote buffer """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t) if offset is not None: check_in_graph(g, offset) remote_buffer_handle = prepare_remote_buffer(t, remote_buffer_handle, g) settings = ctx._get_op_settings('remote_load_inplace') opid = _ir.OperatorIdentifier("ai.graphcore", "RemoteLoadInplace", 1, _ir.NumInputs(1, 2), 1) if offset is not None: op = pb_g.createConnectedOp_RemoteLoadInplaceOp( { 0: t.id, 1: offset.id }, {0: g._create_tensor_id("remote_load_inplace_out")}, opid, settings, remote_buffer_handle.remote_buffer_id) else: op = pb_g.createConnectedOp_RemoteLoadInplaceOp( { 0: t.id, }, {0: g._create_tensor_id("remote_load_inplace_out")}, opid, settings, remote_buffer_handle.remote_buffer_id) return Tensor._from_pb_tensor(op.outTensor(0))
def ipu_copy(t: Tensor, destination: int, source: Optional[int] = None) -> Tensor: """ Copies a Tensor to a virtual graph. Args: t: Tensor Tensor to be copied. destination: int Ipu for the tensor to be copied to. source: Optional[int] Ipu for the tensor to be copied from. By default, the source will be taken from the producer of `t`. If `t` does not have a producer a source MUST be provided. Returns: t_copied: Tensor The copied tensor """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t) if source is None: # Use internal method to infer the input tensor's virtual graph. source = t._pb_tensor.getVirtualGraphIdUnsafe() if source == -1: raise ValueError( f"Could not infer virtual graph for Tensor to be copied \"{t}\" . " "Please specify `source` when copying for this tensor.") settings = ctx._get_op_settings('ipucopy') opid = _ir.OperatorIdentifier("ai.graphcore", "IpuCopy", 1, _ir.NumInputs(1, 1), 1) op = pb_g.createConnectedOp_IpuCopyOp( { 0: t.id, }, { 0: g._create_tensor_id(t.name + f"_c{destination}"), }, opid, source, destination, settings, ) return Tensor._from_pb_tensor(op.outTensor(0))
def slice_(t: Tensor, start: Optional[Union[int, List[Optional[int]]]] = None, stop: Optional[Union[int, List[Optional[int]]]] = None, step: Optional[Union[int, List[Optional[int]]]] = None, axis: Optional[Union[int, List[int]]] = None) -> Tensor: """ Selects elements from a tensor using a slice or multiple slices. Inplace. This is the inplace version of :func:`~ops.slice`. Behaviour is the same, but modifies the tensor inplace. ``` Args: t (Tensor): Tensor to slice start: Index of first element (inclusive) or `None` which defaults to 0. stop: Index of last element (exclusive) or `None` which defaults to last element (inclusive) if step is forward or first element (inclusive) if step is backwards. step: `1` for forward or `-1` for backwards. axis: Axis of tensor to slice on or `None` will default to each axis sequentially. Returns: Tensor: alias of the input tensor t. """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t) if start is None and stop is None and step is None: return t start, stop, step, axis = process_args(start, stop, step, axis) opid = _ir.OperatorIdentifier("ai.graphcore", "SliceInplace", 1, _ir.NumInputs(1, 1), 1) settings = ctx._get_op_settings("slice_inplace") op = pb_g.createConnectedOp_SliceInplaceOp( {0: t.id}, {0: g._create_tensor_id("slice_out")}, starts_=start, ends_=stop, axes_=axis, steps_=step, opid=opid, settings=settings, ) return Tensor._from_pb_tensor(op.outTensor(0))
def subgraph_to_op_tensor(self, subgraph_tensor: Tensor) -> Tensor: """ Provided an input or output tensor in the `called_graph`, this method returns the associated input or output tensor on the CallOp. Args: subgraph_tensor (Tensor): The tensor in the subgraph. Raises: ValueError: If `subgraph_tensor` is not an input or output of the called graph. Returns: Tensor: The associated input or output tensor on the CallOp """ sgraph = self.called_graph._pb_graph if sgraph.hasInputId(subgraph_tensor.id): idx = sgraph.getInputIndex(subgraph_tensor.id) return Tensor._from_pb_tensor(self._op.inTensor(idx)) if sgraph.hasOutputId(subgraph_tensor.id): idx = sgraph.getOutputIndex(subgraph_tensor.id) return Tensor._from_pb_tensor(self._op.outTensor(idx)) raise ValueError( f"Tensor {subgraph_tensor.name} is not an Input or Output of the called graph {sgraph.id}" )
def group_norm(t: Tensor, weight: Tensor, bias: Tensor, num_groups: int, eps: float = 1e-5) -> Tensor: """ Applies Group Normalisation over a tensor `t` as described in https://arxiv.org/abs/1803.08494 Args: t: Tensor Tensor to be normalized. weight: Tensor Tensor to scale output of normalisation. bias: Tensor Tensor to shift output of normalisation. num_groups: Number of groups to separate the channels into. Returns: out: Tensor The group normalised Tensor. """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t, weight, bias) settings = ctx._get_op_settings('group_norm') opid = _ir.OperatorIdentifier("ai.graphcore", "GroupNormalization", 1, _ir.NumInputs(3, 3), 3) op = pb_g.createConnectedOp_GroupNormOp( { 0: t.id, 1: weight.id, 2: bias.id }, { 0: g._create_tensor_id("group_norm_out"), 1: g._create_tensor_id("group_norm_mean"), 2: g._create_tensor_id("group_norm_inv_std_dev"), }, opid, num_groups, eps, settings, ) return Tensor._from_pb_tensor(op.outTensor(0))
def random_normal(seed_tensor: Tensor, shape: Tuple[int, ...], mean: float = 0.0, std: float = 1.0, dtype: dtypes.dtype = dtypes.float32): """ Randomly sample from a normal distribution with `mean` and standard deviation `std`. Note: not compatible with `IPUModel`. Args: seed_tensor (Tensor): Used to seed the probability distribution. Must have data type uint32 and shape (2,). shape (Tuple[int, ...]): Shape of output tensor mean (float, optional): Mean of distribution. Defaults to 0.0. std (float, optional): Standard deviation of distribution. Defaults to 1.0. dtype (dtypes.dtype, optional): Data type of output tensor. Defaults to dtypes.float32. Returns: Tensor: tensor with elements sampled from a normal distribution. """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, seed_tensor) settings = ctx._get_op_settings('random_normal') opid = _ir.OperatorIdentifier("ai.onnx", "RandomNormal", 1, _ir.NumInputs(1, 1), 1) op = pb_g.createConnectedOp_RandomNormalOp( {0: seed_tensor.id}, {0: g._create_tensor_id("random_normal_out")}, shape_=shape, mean_=mean, scale_=std, dataType_=convert_optional_dtype(dtype), opid=opid, settings=settings, ) return Tensor._from_pb_tensor(op.outTensor(0))
def random_uniform(seed_tensor: Tensor, shape: Tuple[int, ...], low: float = 0.0, high: float = 1.0, dtype: dtypes.dtype = dtypes.float32): """ Randomly sample from a uniform distribution with minimum value `low` and maximum value `high`. Note: not compatible with `IPUModel`. Args: seed_tensor (Tensor): Used to seed the probability distribution. Must have data type uint32 and shape (2,). shape (Tuple[int, ...]): Shape of output tensor low (float, optional): Minimum value. Defaults to 0.0. high (float, optional): Maximum value. Defaults to 1.0. dtype (dtypes.dtype, optional): Data type of output tensor. Defaults to dtypes.float32. Returns: Tensor: tensor with elements sampled from a uniform distribution. """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, seed_tensor) settings = ctx._get_op_settings('random_uniform') opid = _ir.OperatorIdentifier("ai.onnx", "RandomUniform", 1, _ir.NumInputs(1, 1), 1) op = pb_g.createConnectedOp_RandomUniformOp( {0: seed_tensor.id}, {0: g._create_tensor_id("random_uniform_out")}, shape_=shape, low_=low, high_=high, dataType_=convert_optional_dtype(dtype), opid=opid, settings=settings, ) return Tensor._from_pb_tensor(op.outTensor(0))
def accumulate_(t: Tensor, X: Tensor, f: Optional[Union[float, Tensor]] = None) -> Tensor: """ Updates tensor `t` inplace using `t = t + (f * X)`. Does not apply numpy broadcasting. Uses mixed precision poplibs operations. `t` and `X` must be the same shape, but can be different types. `f` must be scalar. Args: t: Tensor Tensor to be updated. X: Tensor Value to update the variable f: Optional[Union[float, Tensor]] Optional scalar to apply to update before the addition. Returns: updated: Tensor An alias to the variable. """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t, X) ins = {0: t.id, 1: X.id} ov = handle_optimizer_value(g, f, ins, 2) settings = ctx._get_op_settings('accumulate') op = pb_g.createConnectedOp_AccumulateOp( ins, { 0: g._create_tensor_id('accumulate__' + t.name), }, _ir.AccumulationType.DampenedAdd if f is not None else _ir.AccumulationType.Add, ov, settings, ) return Tensor._from_pb_tensor(op.outTensor(0))
def print_tensor(t: Tensor, title: str = None, print_self: bool = True, print_gradient: bool = False) -> Tensor: """Print a tensor everytime this op runs in the graph. Note this will print in the context it is placed. E.g. if within a loop op, it will run each loop iteration. Args: t (Tensor): The tensor to print. title (str, optional): Title to print. Defaults to None. print_self (bool, optional): Print the tensor itself. Defaults to True. print_gradient (bool, optional): Whether to print the associated gradient tensor of t. Defaults to False. Returns: Tensor: The same unaltered tensor. """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t) settings = ctx._get_op_settings('print_tensor') opid = _ir.OperatorIdentifier("ai.graphcore", "PrintTensor", 1, _ir.NumInputs(1, 1), 1) if title is None: title = f"print{t.name}" op = pb_g.createConnectedOp_PrintTensorOp( { 0: t.id, }, { 0: g._create_tensor_id("print_out"), }, opid, print_self, print_gradient, title, settings, ) return Tensor._from_pb_tensor(op.outTensor(0))
def scaled_add(X: Tensor, Y: Tensor, a: Union[float, Tensor] = 1.0, b: Union[float, Tensor] = 1.0) -> Tensor: """ Calculates `Z = aX + bY`. Does not apply numpy broadcasting. Uses mixed precision poplibs operations. `X` and `Y` must be the same shape, but can be different types. `a` and `b` must be scalars. Args: X, Y: Tensor a, b: Union[float, Tensor] Scalars to be applied to X/Y before addition. Returns: Z: Tensor """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, X, Y) ins = {0: X.id, 1: Y.id} if isinstance(a, Tensor): ins[2] = a.id a = 1.0 if isinstance(b, Tensor): ins[3] = b.id b = 1.0 settings = ctx._get_op_settings('scaled_add') opid = _ir.OperatorIdentifier("ai.graphcore", "ScaledAdd", 1, _ir.NumInputs(2, 4), 1) op = pb_g.createConnectedOp_ScaledAddOp( ins, { 0: g._create_tensor_id('scaled_add_out'), }, opid, a, b, settings) return Tensor._from_pb_tensor(op.outTensor(0))
def host_load(h2d_stream: HostToDeviceStream, name: Optional[str] = None) -> Tensor: """ Host Load Op: an op to represent the transfer of data from the host to the device. It uses the existing host to device transfers created when building the IR, but defers the actual poplar::Copy until the op itself runs. This allows the copy to be scheduled as part of the normal op scheduling. Args: h2d_stream: (HostToDeviceStream) Stream to load from. name (str): Name to use for the returned tensor. Returns: Tensor: The output tensor streamed from host. """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph shape = h2d_stream.shape dtype = h2d_stream.dtype stream_tensor_id = h2d_stream.tensor_id() if name is None: pb_main = g.ir().main_graph()._pb_graph name = _ir.removeScope(pb_main, stream_tensor_id) init_tensor = init(shape, dtype, name + '_init') name_hostload = g._create_tensor_id(name + '_hostload') opid = _ir.OperatorIdentifier("ai.graphcore", "HostLoad", 1, _ir.NumInputs(1), 1) pb_g.createConnectedOp_HostLoadOp( {0: init_tensor.id}, {0: name_hostload}, opid, ctx._get_op_settings('host_load'), stream_tensor_id, ) return Tensor._from_pb_tensor(pb_g.getTensor(name_hostload))
def io_tile_copy(t: Tensor) -> Tensor: """ Copies a Tensor to/from io tiles on the current virtual_graph. Args: t: Tensor Tensor to be copied. Returns: t_copied: Tensor The copied tensor """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t) settings = ctx._get_op_settings('iotilecopy') # Use internal method to infer the input tensor's tileSet vgid, tile_set = t._pb_tensor.getVirtualGraphIdAndTileSetUnsafe() if vgid != -1: settings.vgraphId = _ir.OptionalVGraphId(vgid) if tile_set != _ir.TileSet.Undefined: # TileSet should match the destination # so it should be the opposite of the source `t`. settings.tileSet = _ir.TileSet.IO if tile_set == _ir.TileSet.Compute else _ir.TileSet.Compute opid = _ir.OperatorIdentifier("ai.graphcore", "IoTileCopy", 1, _ir.NumInputs(1, 1), 1) op = pb_g.createConnectedOp_IoTileCopyOp( { 0: t.id, }, { 0: g._create_tensor_id(t.name + f"_iotilecopy"), }, opid, settings, ) return Tensor._from_pb_tensor(op.outTensor(0))
def scaled_add_(X: Tensor, Y: Tensor, a: Union[float, Tensor] = 1.0, b: Union[float, Tensor] = 1.0) -> Tensor: """ Calculates `X = aX + bY`. Inplace on X. Does not apply numpy broadcasting. Uses mixed precision poplibs operations. `X` and `Y` must be the same shape, but can be different types. Args: X, Y: Tensor a, b: Union[float, Tensor] Scalars to be applied to X/Y before addition. Returns: X: Tensor Updated `X` tensor. Alias of X. """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, X, Y) ins = {0: X.id, 1: Y.id} if isinstance(a, Tensor): ins[2] = a.id a = 1.0 if isinstance(b, Tensor): ins[3] = b.id b = 1.0 settings = ctx._get_op_settings('scaled_add') op = pb_g.createConnectedOp_ScaledAddLhsInplaceOp( ins, { 0: g._create_tensor_id('scaled_add__' + X.name), }, a, b, settings) return Tensor._from_pb_tensor(op.outTensor(0))
def h2d_stream(shape: Iterable[int], dtype: dtype, name: Optional[str] = None) -> HostToDeviceStream: g = gcg() mg = g.ir().main_graph() if g.name != mg.name: raise ValueError( "popart.ir: Can only call `h2d_stream` in context of main graph. You are in context of graph:", g.name) pb_mg = mg._pb_graph if name is None: name = "h2d_stream" name = mg._create_tensor_id(name) pb_mg.addStream(name, _ir.TensorInfo(dtype._pb_dtype, list(shape)), name) return HostToDeviceStream._from_tensor( Tensor._from_pb_tensor(pb_mg.getTensor(name)))
def dropout(t: Tensor, seed_tensor: Tensor, p: float): """ Randomly zeros elements of `t` with a probability of `p`. The dropout mask is created using samples from a Bernoulli distribution seeded with the `seed_tensor`. The user needs to manage updating the `seed_tensor` for each forward pass and replica. Args: t (Tensor): Tensor for drop out to be applied. seed_tensor (Tensor): Used to seed the probability distribution which generates the dropout mask. Must have data type uint32 and shape (2,). p (float): Probability an element will be zeroed Returns: Tensor """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t, seed_tensor) settings = ctx._get_op_settings('dropout') opid = _ir.OperatorIdentifier("ai.onnx", "Dropout", 10, _ir.NumInputs( 1, 1), 1) op = pb_g.createConnectedOp_DropoutOp( { 0: t.id, 1: seed_tensor.id }, {0: g._create_tensor_id("dropout_out")}, ratio_=p, opid=opid, settings=settings, ) return Tensor._from_pb_tensor(op.outTensor(0))
def op_in_to_subgraph_in_tensor(self, parent_tensor: Tensor) -> Tensor: """ Provided an input tensor on the CallOp, this method returns the associated input tensor in the `called_graph`. Args: parent_tensor (Tensor): The tensor from the parent graph. Raises: popart_error: If `parent_tensor` is not an input to the CallOp. Returns: Tensor: The tensor in the `called_graph`. """ pb_subgraph = self.called_graph._pb_graph # Throws if not an input op_in_idx = self._op.firstInIndex(parent_tensor._pb_tensor) pb_sub_tensor = pb_subgraph.getInputTensor( self._op.opInToSubgraphInIndex(op_in_idx)) return Tensor._from_pb_tensor(pb_sub_tensor)
def reshape_(t: Tensor, shape: Tuple[int, ...]) -> Tensor: """ Reshape a Tensor inplace. This is the inplace version of :func:`~ops.reshape` Args: t: Tensor Tensor to be reshaped. shape: tuple of ints Tuple containing the shape of the output. Returns: out: Tensor An alias of the input tensor, reshaped. """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t) if any(map(lambda axis: axis < -1, shape)) or shape.count(0) > 0: raise ValueError( f"Invalid shape value in reshape. Must be '>0' or '-1'. Provided {shape}." ) if shape.count(-1) > 1: raise ValueError( f"Reshape shape can contain at most one '-1' value. Provided {shape}." ) settings = ctx._get_op_settings('reshape_inplace') opid = _ir.OperatorIdentifier("ai.graphcore", "ReshapeInplace", 1, _ir.NumInputs(1, 1), 1) op = pb_g.createConnectedOp_ReshapeInplaceOp( {0: t.id}, {0: g._create_tensor_id(f"{t.name}_reshaped")}, opid, reshape_handle_negative_axis(t, shape), settings) return Tensor._from_pb_tensor(op.outTensor(0))
def transpose_(t: Tensor, permutation: Optional[Tuple[int, ...]] = None) -> Tensor: """ Permute the axes of a Tensor. By default reverses the axes of t. This is the inplace version of :func:`~ops.transpose`. Behaviour is the same, but modifies the tensor inplace. Args: t: Tensor Tensor to be transposed. permutation: tuple of ints (optional) Tuple containing the a permutation of [0, N-1] where N is the rank of input `t`. If not provided, the axes will be reversed. Returns: out: Tensor The transposed tensor """ permutation = _handle_permuation(t, permutation) ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t) settings = ctx._get_op_settings('transpose_inplace') opid = _ir.OperatorIdentifier("ai.graphcore", "TransposeInplace", 1, _ir.NumInputs(1, 1), 1) op = pb_g.createConnectedOp_TransposeInplaceOp( {0: t.id}, {0: g._create_tensor_id(f"{t.name}_T")}, opid, permutation, settings, ) return Tensor._from_pb_tensor(op.outTensor(0))
def negate(t: Tensor) -> Tensor: """ Negates tensor `t` Args: t: Tensor Input tensor. Returns: out: Tensor Output tensor. """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t) settings = ctx._get_op_settings('negate') opid = _ir.OperatorIdentifier("ai.onnx", "Neg", 6, _ir.NumInputs(1, 1), 1) op = pb_g.createConnectedOp_NegateOp( {0: t.id}, {0: g._create_tensor_id(f"negate_out")}, opid, settings) return Tensor._from_pb_tensor(op.outTensor(0))
def increment_mod(t: Tensor, increment: float, modulus: float) -> Tensor: """ Compute `(t + increment) % modulus` Args: t: Tensor Tensor to increment (modulo) increment: float How much to increment the input tensor by. increment: float The modulo operand. Returns: out: Tensor """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t) settings = ctx._get_op_settings('increment_mod') opid = _ir.OperatorIdentifier("ai.graphcore", "IncrementMod", 1, _ir.NumInputs(1, 1), 1) op = pb_g.createConnectedOp_IncrementModOp( { 0: t.id, }, { 0: g._create_tensor_id("increment_mod_out"), }, opid, increment, modulus, settings, ) return Tensor._from_pb_tensor(op.outTensor(0))
def div(lhs: Tensor, rhs: Tensor) -> Tensor: """ Divides two Tensors element-wise. Follows numpy broadcasting rules. Arguments must have the same dtype. Output will be the same dtype as the inputs. With integer values floor division is used. Args: lhs: Tensor Divisor rhs: Tensor Dividend Returns: mul: Tensor The division of lhs by rhs """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, lhs, rhs) settings = ctx._get_op_settings('div') opid = _ir.OperatorIdentifier("ai.onnx", "Div", 7, _ir.NumInputs(2, 2), 1) op = pb_g.createConnectedOp_DivOp( { 0: lhs.id, 1: rhs.id }, { 0: g._create_tensor_id("div_out"), }, opid, settings, ) return Tensor._from_pb_tensor(op.outTensor(0))
def logical_and(lhs: Tensor, rhs: Tensor) -> Tensor: """ Compares two Tensors element-wise with an AND operator. Follows numpy broadcasting rules. Inputs will be cast to bool if needed. Args: lhs, rhs: Tensor Tensors to be compared. Returns: out: Tensor The value (lhs AND rhs) """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, lhs, rhs) lhs = cast_if_needed(lhs, dtypes.bool) rhs = cast_if_needed(rhs, dtypes.bool) settings = ctx._get_op_settings('and') opid = _ir.OperatorIdentifier("ai.onnx", "And", 7, _ir.NumInputs(2, 2), 1) op = pb_g.createConnectedOp_AndOp( { 0: lhs.id, 1: rhs.id }, { 0: g._create_tensor_id("and_out"), }, opid, settings, ) return Tensor._from_pb_tensor(op.outTensor(0))
def create_adamupdater(acc_first_order: Tensor, acc_second_order: Tensor, ins, mode, weight: Optional[Tensor] = None, time_step: Optional[Tensor] = None, weight_decay: Optional[Union[float, Tensor]] = None, beta1: Optional[Union[float, Tensor]] = None, beta2: Optional[Union[float, Tensor]] = None, epsilon: Union[float, Tensor] = 1e-07) -> Tensor: ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, acc_first_order, acc_second_order) if weight is not None: check_in_graph(g, weight) if time_step is not None: check_in_graph(g, time_step) if weight_decay is not None: check_in_graph(g, weight) outs = { 0: g._create_tensor_id('Updater'), } wd = handle_optimizer_value(g, weight_decay, ins, 4) b1 = handle_optimizer_value(g, beta1, ins, 5) b2 = handle_optimizer_value(g, beta2, ins, 6) eps = handle_optimizer_value(g, epsilon, ins, 7) settings = ctx._get_op_settings('adamupdater') op = pb_g.createConnectedOp_AdamUpdaterOp(ins, outs, mode, wd, b1, b2, eps, settings) return Tensor._from_pb_tensor(op.outTensor(0))
def scatter(t: Tensor, indices: Tensor, values: Tensor, axis: int = 0, available_memory_proportion: Optional[float] = None) -> Tensor: """ Select multiple elements from an array, given by `indices`, and updates the values from `values`. Scatter takes three inputs data, indices, and values of the same rank r >= 1 and an optional attribute axis that identifies an axis of data (by default, the outer-most axis, that is axis 0). The output of the operation is produced by creating a copy of the input data, and then updating its value to values specified by updates at specific index positions specified by indices. Its output shape is the same as the shape of data. For each entry in values, the target index in data is obtained by combining the corresponding entry in indices with the index of the entry itself: the index-value for dimension = axis is obtained from the value of the corresponding entry in indices and the index-value for dimension != axis is obtained from the index of the entry itself. Pseudo example: ``` x1 = x.copy() scatter(x, [1, 2, 3], [-1, -2, -3]) x2 = x.copy() x[1] = -1 x[2] = -2 x[3] = -3 x1 == x2 ``` Args: t: Tensor Input tensor indices: Tensor The indices of the elements to update values: Tensor The values to update the tensor with axis: int Which axis to set on. Default is 0. available_memory_proportion: Optional[float] The maximum proportion of available memory on each tile that this layer should consume temporarily during the course of the operation. Defaults to 1.0 if not set globally. Returns: scatter: Tensor The tensor with updated values. """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t) check_in_graph(g, indices) check_in_graph(g, values) available_memory_proportion = convert_optional_float( available_memory_proportion) opid = _ir.OperatorIdentifier("ai.onnx", "Scatter", 11, _ir.NumInputs(3, 3), 1) settings = ctx._get_op_settings("scatter") op = pb_g.createConnectedOp_ScatterOp( { 0: t.id, 1: indices.id, 2: values.id }, {0: g._create_tensor_id("scatter_out")}, axis_=axis, opid=opid, available_memory_proportion_=available_memory_proportion, settings=settings) return Tensor._from_pb_tensor(op.outTensor(0))
def get_tensors(self) -> Tuple[Tensor, ...]: """Return all Tensors in the Graph""" return tuple( Tensor._from_pb_tensor(t) for t in self._pb_graph.getTensors())
def get_tensor(self, tensor_id: str) -> Tensor: return Tensor._from_pb_tensor(self._pb_graph.getTensor(tensor_id))
def matmul(lhs: Tensor, rhs: Tensor, available_memory_proportion: Optional[float] = None, serialise_mode: SerialiseMode = SerialiseMode.NoSerialisation, serialise_factor: int = 1, output_type: Optional[dtypes.dtype] = None, partials_type: Optional[dtypes.dtype] = None) -> Tensor: """Matrix multiplies two Tensors. Follows numpy matrix multiplication rules for N-D tensors, see https://numpy.org/doc/stable/reference/generated/numpy.matmul.html Arguments must have the same dtype. Shapes must be compatible as per numpy matrix multiplication rules. Args: lhs, rhs (Tensor): Tensors to be matrix multiplied. available_memory_proportion (Optional[float]): The maximum proportion of available memory on each tile that this layer should consume temporarily during the course of the operation. Defaults to 1.0. serialise_mode (SerialiseMode, optional): The serialisation mode to use (NoSerialisation, ReducingDim, InputChannels, OutputChannels). Defaults to SerialiseMode.NoSerialisation. serialise_factor (int, optional): The factor to serialise by. Defaults to 1. output_type (Optional[dtypes.dtype], optional): 3 Output datatype to enforce. Defaults to the dtype of lhs/rhs. partials_type (dtypes.dtype, optional): The type to use for partial results (float16, float32). Defaults to dtypes.float32. Returns: Tensor: The matrix product of lhs and rhs. """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, lhs, rhs) settings = ctx._get_op_settings('matmul') opid = _ir.OperatorIdentifier("ai.onnx", "MatMul", 9, _ir.NumInputs(2, 2), 1) if partials_type is None: partials_type = g.ir()._pb_ir.getSessionOptions().partialsTypeMatMuls if partials_type == "": partials_type = _ir.op.MatMulPartialsType.FLOAT else: partials_type = _convert_partials_type(partials_type) # These two args can be none, in which case we want to send a no-opt optional. out_dtype = _ir.OptionalDataType( output_type._pb_dtype) if output_type else _ir.OptionalDataType() optional_memory_proportion = convert_optional_float( available_memory_proportion) serialise_settings = _convert_serialisation_settings( serialise_mode, serialise_factor) op = pb_g.createConnectedOp_MatMulOp({ 0: lhs.id, 1: rhs.id }, { 0: g._create_tensor_id("matmul_out"), }, opid, settings, optional_memory_proportion, serialise_settings, out_dtype, partials_type) return Tensor._from_pb_tensor(op.outTensor(0))
def get_op_input_tensor(self, op_in_idx: int) -> Tensor: pb_op_in_tensor = self._op.inTensor(op_in_idx) return Tensor._from_pb_tensor(pb_op_in_tensor)
def get_op_output_tensor(self, op_out_idx: int) -> Tensor: pb_op_out_tensor = self._op.outTensor(op_out_idx) return Tensor._from_pb_tensor(pb_op_out_tensor)
def dynamic_update_(t: Tensor, index: Tensor, t_update: Tensor, axes: Iterable[int], sizes: Iterable[int], no_overlap: bool) -> Tensor: """ Dynamically updates tensor `t` inplace. The word "dynamic" refers to the fact that the index can be specified during runtime. index, axes and sizes determines the slice of t which will be updated. The dimension of this slice and t_update must match. A slice along an axis can be defined as by the tuple ( start, stop, step ) start - will be equal the index for the respective axis stop - will be equal index + size for the respective axis step - will equal 1 Limitations: Assuming we would like to update t with dimension (4, 3). The slicing of t will have the following limitations: - Step other than 1 is not supported (i.e. t[::2,:] is not supported) - Negative slicing is not supported (i.e. t[:-1,:] is not supported) - stop larger than the size of the axis is not supported (i.e. t[:5,:] is not supported) Args: t: Tensor Tensor to update. index: Tensor The indices to start the slice from. t_update: Tensor The tensor to update t with. axes: List[int] The axess of t to make the update at. sizes: List[int] The sizes of the updates along the specified axes. For example: If index = [1, 2], axes = [0, 3] and sizes = [2, 4], the Tensor will be updated at t[1:2, :, :, 2:4] no_overlap : bool If set to true, then correct gradient backpropagation is only guaranteed if each region in the output tensor has exactly one populator (operation that writes data to this region). There are no run-time or compile-time checks possible to ensure this. Returns: out: Tensor The updated tensor. """ ctx = get_current_context() g = ctx.graph pb_g = g._pb_graph check_in_graph(g, t, index) settings = ctx._get_op_settings('dynamicupdate_inplace') # This ensures that `t` is created by calling `popops::createSliceableTensorFromSlice` # with `t_update`. # Does the user need control over this? settings.inferTensorMappingToFrom = {0: 2} opid = _ir.OperatorIdentifier("ai.graphcore", "DynamicUpdateInplace", 1, _ir.NumInputs(3, 3), 1) op = pb_g.createConnectedOp_DynamicUpdateInplaceOp( { 0: t.id, 1: index.id, 2: t_update.id }, {0: g._create_tensor_id(f"dynamicupdateinplace_out")}, opid, axes, sizes, no_overlap, settings, t_update._pb_tensor.info) return Tensor._from_pb_tensor(op.outTensor(0))