def bprop(x, indices, axis, out, dout): x_shp = shape_op(x) if axis == 0: indices_size = (size_op(indices), ) x_tail_shp = x_shp[1:] values_shape = indices_size + x_tail_shp values = reshape(dout, values_shape) indices = reshape(indices, indices_size) return RowTensor(indices, values, x_shp), zeros_like(indices), zeros_like(axis) if F.rank(dout) == 0: dout = P.ExpandDims()(dout, -1) if F.rank(indices) == 0: indices = P.ExpandDims()(indices, -1) out_shp = shape_op(dout) ind_shp = shape_op(indices) # Example: out_shape:(3,2,3) axis 1 -> (1,0,2) perm_1 = _generate_shape_index(out_shp, ind_shp, axis) values_transpose = transpose(dout, perm_1) params_grad = unsorted_segment_sum(values_transpose, indices, shape_op(x)[axis]) # Example: out_shape:(3,2,3) axis 2 -> (1,2,0) perm_2 = _generate_inverse_index(x_shp, axis) params_grad = transpose(params_grad, perm_2) return params_grad, zeros_like(indices), zeros_like(axis)
def construct(self, input_indices, input_values, field_ids): _check_input_2d(F.shape(input_indices), "input_indices", self.cls_name) _check_input_2d(F.shape(input_values), "input_values", self.cls_name) _check_input_2d(F.shape(field_ids), "field_ids", self.cls_name) _check_input_dtype(F.dtype(input_indices), "input_indices", [mstype.int32, mstype.int64], self.cls_name) _check_input_dtype(F.dtype(input_values), "input_values", [mstype.float32], self.cls_name) _check_input_dtype(F.dtype(field_ids), "field_ids", [mstype.int32], self.cls_name) batch_size = self.shape(input_indices)[0] num_segments = batch_size * self.field_size bias = Range(0, num_segments, self.field_size)() bias = self.reshape(bias, (batch_size, -1)) field_ids = self.bias_add(field_ids, bias) if self.target == "CPU": out = self.embeddinglookup(self.embedding_table, input_indices, 0) else: if self.forward_unique: shp = self.shape(input_indices) + (self.embedding_size,) indices_flatten = self.reshape(input_indices, (-1,)) unique_id, unique_idx = self.unique(indices_flatten) weight_unique = self.gatherv2(self.embedding_table, unique_id, 0) weight_flatten = self.gather_revert(weight_unique, unique_idx, 0) out = self.reshape(weight_flatten, shp) else: out = self.gatherv2(self.embedding_table, input_indices, 0) if self.max_norm is not None: axis = _make_axis_range(F.rank(input_indices), F.rank(out)) clip_by_norm = ClipByNorm(axis) out = clip_by_norm(out, self.max_norm) weights = self.reshape(input_values, (batch_size, self.shape(input_indices)[1], 1)) embedding = self.mul(weights, out) if self.operator == 'MAX': # Fill the padding value to -inf, so the padded value will not influence the results negative_inf_mask = self.cast(self.equal(weights, 0), mstype.float32) inf_mask = self.inf_mask_mul(negative_inf_mask, self.negative_inf_value) embedding = self.inf_add(embedding, inf_mask) embedding = self.reshape(embedding, (-1, self.embedding_size)) field_ids = self.reshape(field_ids, (-1,)) merged_vectors = self.merge_op(embedding, field_ids, num_segments) if self.operator == 'MAX': value_count = self.count_op(self.abs(self.reshape(input_values, (-1,))), field_ids, num_segments) value_zeros = self.cast(self.max_no_equal(value_count, 0.0), mstype.float32) count = self.expand(value_zeros, -1) merged_vectors = self.max_mask_mul(merged_vectors, count) if self.operator == 'MEAN': value_count = self.count_op(self.abs(input_values), field_ids, num_segments) value_count = self.expand(value_count, -1) merged_vectors = self.div_no_nan(merged_vectors, value_count) merged_vectors = self.reshape(merged_vectors, (batch_size, self.field_size, -1)) return merged_vectors
def construct(self, indices): if self.target == "CPU": out = self.embeddinglookup(self.embedding_table, indices, 0) else: out = self.gatherv2(self.embedding_table, indices, 0) if self.max_norm is not None: axis = _make_axis_range(F.rank(indices), F.rank(out)) clip_by_norm = ClipByNorm(axis) out = clip_by_norm(out, self.max_norm) return out
def construct(self, indices): if self.target == "CPU": out = self.embeddinglookup(self.embedding_table, indices, 0) else: if self.forward_unique: shp = self.shape(indices) + (self.embedding_size,) indices_flatten = self.reshape_first(indices, (-1,)) unique_id, unique_idx = self.unique(indices_flatten) weight_unique = self.gatherv2(self.embedding_table, unique_id, 0) weight_flatten = self.gather_revert(weight_unique, unique_idx, 0) out = self.reshape(weight_flatten, shp) else: out = self.gatherv2(self.embedding_table, indices, 0) if self.max_norm is not None: axis = _make_axis_range(F.rank(indices), F.rank(out)) clip_by_norm = ClipByNorm(axis) out = clip_by_norm(out, self.max_norm) return out
def matmul(x1, x2, dtype=None): """ Returns the matrix product of two arrays. Note: Numpy arguments `out`, `casting`, `order`, `subok`, `signature`, and `extobj` are not supported. On GPU, the supported dtypes are np.float16 and np.float32. On CPU, the supported dtypes are np.float16 and np.float32. Args: x1 (Tensor): Input tensor, scalar not allowed. x2 (Tensor): Input tensor, scalar not allowed. dtype (:class:`mindspore.dtype`, optional): defaults to None. Overrides the dtype of the output Tensor. Returns: Tensor or scalar, the matrix product of the inputs. This is a scalar only when both `x1`, `x2` are 1-d vectors. Raises: ValueError: If the last dimension of `x1` is not the same size as the second-to-last dimension of `x2`, or if a scalar value is passed in. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` Examples: >>> x1 = Tensor(np.arange(2*3*4).reshape(2, 3, 4), mindspore.float32) >>> x2 = Tensor(np.arange(4*5).reshape(4, 5), mindspore.float32) >>> output = ops.matmul(x1, x2) >>> print(output) [[[ 70. 76. 82. 88. 94.] [ 190. 212. 234. 256. 278.] [ 310. 348. 386. 424. 462.]] [[ 430. 484. 538. 592. 646.] [ 550. 620. 690. 760. 830.] [ 670. 756. 842. 928. 1014.]]] """ # performs type promotion dtype1 = F.dtype(x1) dtype2 = F.dtype(x2) if not _check_same_type(dtype1, dtype2): x1 = x1.astype(mstype.float32) x2 = x2.astype(mstype.float32) ndim1_orig, ndim2_orig = F.rank(x1), F.rank(x2) shape1_orig, shape2_orig = F.shape(x1), F.shape(x2) transpose_b = ndim2_orig == 1 shape_backbone = _check_matmul_shapes(shape1_orig, shape2_orig) # infers the shape of the output shape_out = shape_backbone + _infer_shape_rem( shape1_orig, shape2_orig, ndim1_orig, ndim2_orig, transpose_b) x1 = _expand(x1, 2) x2 = _expand(x2, 2) if F.rank(x2) == 2: if F.rank(x1) > 2: x1 = F.reshape(x1, (-1, shape1_orig[-1])) res = P.MatMul(False, transpose_b)(x1, x2) else: # broadcasts x1.shape[:-2] with x2.shape[:-2] ndim_aligned = _max(ndim1_orig, ndim2_orig) x1 = _expand(x1, ndim_aligned) x2 = _expand(x2, ndim_aligned) shape1_aligned, shape2_aligned = F.shape(x1), F.shape(x2) x1 = _broadcast_to(x1, shape1_aligned[:-2], shape_backbone, ndim_aligned) x2 = _broadcast_to(x2, shape2_aligned[:-2], shape_backbone, ndim_aligned) res = P.BatchMatMul(False, transpose_b)(x1, x2) if dtype is not None: res = res.astype(dtype) return F.reshape(res, shape_out)
def _expand(x, ndim): """Expand x to ndim from axis, which can be 0 or -1.""" while F.rank(x) < ndim: x = F.expand_dims(x, 0) return x