def bprop(x, z, out, dout): if mean_flag: if F.issubclass_(F.typeof(dout), mstype.tensor): if do_mirror: z = F.depend(z, F.assign_add(z, dout)) real_grad = all_reduce(z) dx = real_grad else: dx = dout float_one = F.scalar_cast(1.0, F.dtype(dx)) num = F.scalar_cast(dev_num, F.dtype(dx)) dx = mul(dx, cast(F.scalar_to_array(float_one / num), F.dtype(dx))) else: dx = zeros_like( x) # The grad accumulation do not support row tensor now else: if F.issubclass_(F.typeof(dout), mstype.tensor): if do_mirror: z = F.depend(z, F.assign_add(z, dout)) real_grad = all_reduce(z) dx = real_grad else: dx = dout else: dx = zeros_like( x) # The grad accumulation do not support row tensor now return (dx, zeros_like(z))
def bprop(x, out, dout): if mean_flag: if F.issubclass_(F.typeof(dout), mstype.tensor): dx = all_reduce(dout) float_one = F.scalar_cast(1.0, F.dtype(dx)) num = F.scalar_cast(dev_num, F.dtype(dx)) dx = mul(dx, cast(F.scalar_to_array(float_one / num), F.dtype(dx))) else: indices = all_gather(dout.indices) grad = all_gather(dout.values) float_one = F.scalar_cast(1.0, F.dtype(grad)) num = F.scalar_cast(dev_num, F.dtype(grad)) grad = mul( grad, cast(F.scalar_to_array(float_one / num), F.dtype(grad))) dx = RowTensor(indices, grad, dout.dense_shape) else: if F.issubclass_(F.typeof(dout), mstype.tensor): dx = all_reduce(dout) else: indices = all_gather(dout.indices) grad = all_gather(dout.values) dx = RowTensor(indices, grad, dout.dense_shape) return (dx, )
def bprop(x, out, dout): if F.issubclass_(F.typeof(dout), mstype.tensor): dx = all_reduce_grad(dout) else: indices = all_gather(dout.indices) grad = all_gather(dout.values) dx = RowTensor(indices, grad, dout.dense_shape) return (dx, )
def bprop(x, out, dout): if F.issubclass_(F.typeof(dout), mstype.tensor): dx = all_reduce_grad(dout) else: indices = all_gather(dout[0]) grad = all_gather(dout[1]) dx = (indices, grad, dout[2]) return (dx,)
def concatenate(arrays, axis=0): """ Join a sequence of arrays along an existing axis. Args: arrays: Union[Tensor, tuple(Tensor), list(Tensor)], a Tensor or a list of Tensor to be concatenated. axis (int, optional): The axis along which the arrays will be joined, if axis is None, arrays are flattened before use. Default is 0. Returns: Tensor, a Tensor concatenated from a Tensor or a list of Tensors. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` Examples: >>> import mindspore.numpy as np >>> x1 = np.ones((1,2,3)) >>> x2 = np.ones((1,2,1)) >>> x = np.concatenate((x1, x2), axis=-1) >>> print(x,shape) (1,2,4) """ array_type = F.typeof(arrays) if _check_is_tensor(array_type): # if the input is a single tensor # if only one tensor is provided, it is treated as a tuple along the # first dimension. For example, a tensor of shape (3,4,5) will be treated # as: tuple(tensor_1(4,5), tensor_2(4,5), tensor_3(4,5)) if axis is None: return ravel(arrays) arr_shape = F.shape(arrays) _check_axes_range((axis, ), len(arr_shape)) # move axis 0 to the disiganated position, while keep other axes' relative # positions unchanged new_axes, new_shape = _move_axes_for_concatenate(arr_shape, axis) arrays = transpose(arrays, new_axes) arrays = reshape(arrays, new_shape) return arrays flattened_arrays = () if axis is None: for arr in arrays: flattened_arrays += (ravel(arr), ) axis = -1 return P.Concat(axis)(flattened_arrays) arr_shape = F.shape(arrays[0]) _check_axes_range((axis, ), len(arr_shape)) # if only one tensor in the tuple/list, return the tensor itself if len(arrays) == 1: return arrays[0] return P.Concat(axis)(arrays)
def bprop(x, y, z, out, dout): do_mirror = equal(y, grad_accumulation_step) do_mirror = reshape(do_mirror, (())) if mean_flag: if F.issubclass_(F.typeof(dout), mstype.tensor): if do_mirror: tmp = z + dout real_grad = all_reduce(tmp) dx = real_grad - z else: dx = dout float_one = F.scalar_cast(1.0, F.dtype(dx)) num = F.scalar_cast(dev_num, F.dtype(dx)) dx = mul(dx, cast(F.scalar_to_array(float_one/num), F.dtype(dx))) else: if do_mirror: indices = all_gather(dout.indices) grad = all_gather(dout.values) else: indices = dout.indices grad = dout.values float_one = F.scalar_cast(1.0, F.dtype(grad)) num = F.scalar_cast(dev_num, F.dtype(grad)) grad = mul(grad, cast(F.scalar_to_array(float_one/num), F.dtype(grad))) dx = RowTensor(indices, grad, dout.dense_shape) else: if F.issubclass_(F.typeof(dout), mstype.tensor): if do_mirror: tmp = z + dout real_grad = all_reduce(tmp) dx = real_grad - z else: dx = dout else: if do_mirror: indices = all_gather(dout.indices) grad = all_gather(dout.values) else: indices = dout.indices grad = dout.values dx = RowTensor(indices, grad, dout.dense_shape) return (dx, zeros_like(y), zeros_like(z))
def bprop(x, out, dout): if F.issubclass_(F.typeof(dout), mstype.tensor): if F.issubclass_(F.dtype(dout), mstype.bool_) or F.issubclass_(F.dtype(dout), mstype.int32) \ or F.issubclass_(F.dtype(dout), mstype.int16): return (dout,) dx = op(dout, cast(F.scalar_to_array(divisor), dtype(dout))) return (dx,) if F.issubclass_(F.typeof(dout), mstype.tuple_): dx = () input_nums = F.tuple_len(dout) for i in range(input_nums): ele_grad = op(dout[i], cast(F.scalar_to_array(divisor), dtype(dout[i]))) dx = dx + (ele_grad,) return (dx,) dx = [] input_nums = F.list_len(dout) for i in range(input_nums): ele_grad = op(dout[i], cast(F.scalar_to_array(divisor), dtype(dout[i]))) dx.append(ele_grad) return (dx,)
def bprop(x, out, dout): if F.issubclass_(F.typeof(dout), mstype.tensor): dx = all_reduce_grad(dout) z = equal(x, out) z = cast(z, dtype(dx)) dx = mul(dx, z) else: indices = all_gather(dout.indices) grad = all_gather(dout.values) z = equal(x, out) z = cast(z, dtype(grad)) grad = mul(grad, z) dx = RowTensor(indices, grad, dout.dense_shape) return (dx, )
def bprop(x, out, dout): if F.issubclass_(F.typeof(dout), mstype.tensor): dx = all_reduce_grad(dout) z = equal(x, out) z = cast(z, dtype(dx)) dx = mul(dx, z) else: indices = all_gather(dout[0]) grad = all_gather(dout[1]) z = equal(x, out) z = cast(z, dtype(grad)) grad = mul(grad, z) dx = (indices, grad, dout[2]) return (dx,)