def __init__(self, probs=None, seed=None, dtype=mstype.int32, name="Categorical"): param = dict(locals()) param['param_dict'] = {'probs': probs} valid_dtype = mstype.int_type Validator.check_type("Categorical", dtype, valid_dtype) super(Categorical, self).__init__(seed, dtype, name, param) self._probs = self._add_parameter(probs, 'probs') if self.probs is not None: check_rank(self.probs) check_prob(self.probs) check_sum_equal_one(self.probs) # update is_scalar_batch and broadcast_shape # drop one dimension if self.probs.shape[:-1] == (): self._is_scalar_batch = True self._broadcast_shape = self._broadcast_shape[:-1] self.argmax = P.Argmax() self.broadcast = broadcast_to self.cast = P.Cast() self.clip_by_value = C.clip_by_value self.concat = P.Concat(-1) self.cumsum = P.CumSum() self.dtypeop = P.DType() self.exp = exp_generic self.expand_dim = P.ExpandDims() self.fill = P.Fill() self.floor = P.Floor() self.gather = P.GatherNd() self.less = P.Less() self.log = log_generic self.log_softmax = P.LogSoftmax() self.logicor = P.LogicalOr() self.multinomial = P.Multinomial(seed=self.seed) self.reshape = P.Reshape() self.reduce_sum = P.ReduceSum(keep_dims=True) self.select = P.Select() self.shape = P.Shape() self.softmax = P.Softmax() self.squeeze = P.Squeeze() self.squeeze_first_axis = P.Squeeze(0) self.squeeze_last_axis = P.Squeeze(-1) self.square = P.Square() self.transpose = P.Transpose() self.index_type = mstype.int32
def construct(self, *args): weights = self.weights loss, heatmaps_loss, pafs_loss = self.network(*args) sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) #grads = self.grad(self.network, weights)(*args, sens) grads = self.grad(self.depend_network, weights)(*args, sens) if self.reducer_flag: grads = self.grad_reducer(grads) #return F.depend(loss, self.optimizer(grads)) # for grad in grads: # self.print(grad) loss = F.depend(loss, self.optimizer(grads)) return loss, heatmaps_loss, pafs_loss
def construct(self, x): if not self.training: return x if self.is_gpu: out, _ = self.dropout(x) return out shape = self.get_shape(x) dtype = P.DType()(x) keep_prob = self.cast(self.keep_prob, dtype) output = self.dropout_gen_mask(shape, keep_prob) return self.dropout_do_mask(x, output, keep_prob)
def __init__(self, power=0, name='PowerTransform'): param = dict(locals()) super(PowerTransform, self).__init__(name=name, param=param) validator.check_value_type('power', power, [int, float], self.name) validator.check_number("power", power, 0, Rel.GE, self.name) self._power = power self.pow = P.Pow() self.dtypeop = P.DType() self.cast = P.Cast() self.exp = exp_generic self.expm1 = expm1_generic self.log = log_generic self.log1p = log1p_generic
def _attn(self, query, key, value, attention_mask): """ Get the weighted score along the seq_length Inputs: query: the query matrix key: the key matrix value: the value matrix attention_mask: the attention mask matrix with shape (batch_size, 1, seq_length, seq_length) Returns: weighted_values: Tensor, the weighted sum scores """ if not self.scale: query = query / F.cast(self.coeff, F.dtype(query)) key = key / F.cast(self.coeff, F.dtype(key)) score = self.batch_matmul(query, key) if self.scale: score = score / P.Cast()(self.scale_factor, P.DType()(score)) ori_dtype = P.DType()(score) score = P.Cast()(score, mstype.float32) multiplu_out = P.Sub()(P.Cast()(F.tuple_to_array( (1.0, )), P.DType()(score)), P.Cast()(attention_mask, P.DType()(score))) adder = P.Mul()(multiplu_out, self.multiply_data) attention_scores = adder + score attention_scores = P.Cast()(attention_scores, ori_dtype) shape = F.shape(attention_scores) attention_probs = nn.Softmax()(F.reshape(attention_scores, (-1, shape[-1]))) attention_probs = F.reshape(attention_probs, shape) attention_probs = self.prob_dropout(attention_probs) weighted_values = self.batch_matmul(attention_probs, value) return weighted_values
def construct(self, *inputs): weights = self.weights loss = self.network(*inputs) sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) grads = self.grad(self.network, weights)(*inputs, sens) grads = self.hyper_map( F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads) if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) return F.depend(loss, self.optimizer(grads))
def exp_generic(input_x): """ Log op on Ascend doesn't supprot int types. Fix this with casting the type. """ exp = P.Exp() cast = P.Cast() dtype = P.DType() checktype = P.IsSubClass() if not checktype(dtype(input_x), mstype.float_): input_x = cast(input_x, mstype.float32) return exp(input_x)
def construct(self, *args): weights = self.weights loss = self.network(*args) sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) grads = self.grad(self.network, weights)(*args, sens) if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) if self.use_global_norm: grads = self.hyper_map( F.partial(grad_scale, F.scalar_to_array(self.sens)), grads) grads = C.clip_by_global_norm(grads) return F.depend(loss, self.optimizer(grads))
def __init__(self): super(ClipByNorm, self).__init__() self.reduce_sum = P.ReduceSum(keep_dims=True) self.select_ = P.Select() self.greater_ = P.Greater() self.cast = P.Cast() self.sqrt = P.Sqrt() self.max_op = P.Maximum() self.shape = P.Shape() self.reshape = P.Reshape() self.fill = P.Fill() self.expand_dims = P.ExpandDims() self.dtype = P.DType()
def __init__(self, batch_size, from_seq_length, to_seq_length, num_attention_heads=1, size_per_head=512, use_one_hot_embeddings=False, initializer_range=0.02, do_return_2d_tensor=False, use_relative_positions=False, dtype=mstype.float32, compute_type=mstype.float32): super(BertAttentionRelativePositionValues, self).__init__() self.batch_size = batch_size self.from_seq_length = from_seq_length self.to_seq_length = to_seq_length self.use_relative_positions = use_relative_positions self.size_per_head = size_per_head self.num_attention_heads = num_attention_heads self.trans_shape_position = (1, 2, 0, 3) self.trans_shape_relative = (2, 0, 1, 3) self.scores_mul = Tensor([1.0 / math.sqrt(float(self.size_per_head))], dtype=dtype) self.trans_shape = (0, 2, 1, 3) self.reshape = P.Reshape() self.multiply = P.Mul() self.transpose = P.Transpose() self.batch_num = batch_size * num_attention_heads self.matmul = P.BatchMatMul() self.do_return_2d_tensor = do_return_2d_tensor if self.do_return_2d_tensor: self.shp_return = (batch_size * from_seq_length, num_attention_heads * size_per_head) else: self.shp_return = (batch_size, from_seq_length, num_attention_heads * size_per_head) self.cast_compute_type = SaturateCast(dst_type=compute_type) self._generate_relative_positions_embeddings = \ RelaPosEmbeddingsGenerator(length=self.to_seq_length, depth=self.size_per_head, max_relative_position=16, initializer_range=initializer_range, use_one_hot_embeddings=use_one_hot_embeddings) self.fill = P.Fill() self.multiply = P.Mul() self.type = P.DType() self.cast = P.Cast()
def __init__(self): super(CrossEntropyWithIgnoreIndex, self).__init__() self.onehot = P.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.cast = P.Cast() self.ce = nn.SoftmaxCrossEntropyWithLogits() self.greater = P.Greater() self.maximum = P.Maximum() self.fill = P.Fill() self.sum = P.ReduceSum(keep_dims=False) self.dtype = P.DType() self.relu = P.ReLU() self.reshape = P.Reshape()
def construct(self, grid, prediction, pred_xy, pred_wh, y_true, gt_box): object_mask = y_true[:, :, :, :, 4:5] class_probs = y_true[:, :, :, :, 5:] grid_shape = P.Shape()(prediction)[1:3] grid_shape = P.Cast()(F.tuple_to_array(grid_shape[::-1]), ms.float32) pred_boxes = self.concat((pred_xy, pred_wh)) true_xy = y_true[:, :, :, :, :2] * grid_shape - grid true_wh = y_true[:, :, :, :, 2:4] true_wh = P.Select()(P.Equal()(true_wh, 0.0), P.Fill()(P.DType()(true_wh), P.Shape()(true_wh), 1.0), true_wh) true_wh = P.Log()(true_wh / self.anchors * self.input_shape) box_loss_scale = 2 - y_true[:, :, :, :, 2:3] * y_true[:, :, :, :, 3:4] gt_shape = P.Shape()(gt_box) gt_box = P.Reshape()(gt_box, (gt_shape[0], 1, 1, 1, gt_shape[1], gt_shape[2])) iou = self.iou(P.ExpandDims()(pred_boxes, -2), gt_box) # [batch, grid[0], grid[1], num_anchor, num_gt] best_iou = self.reduce_max(iou, -1) # [batch, grid[0], grid[1], num_anchor] ignore_mask = best_iou < self.ignore_threshold ignore_mask = P.Cast()(ignore_mask, ms.float32) ignore_mask = P.ExpandDims()(ignore_mask, -1) ignore_mask = F.stop_gradient(ignore_mask) xy_loss = object_mask * box_loss_scale * self.cross_entropy( prediction[:, :, :, :, :2], true_xy) wh_loss = object_mask * box_loss_scale * 0.5 * P.Square()( true_wh - prediction[:, :, :, :, 2:4]) confidence_loss = self.cross_entropy(prediction[:, :, :, :, 4:5], object_mask) confidence_loss = object_mask * confidence_loss + ( 1 - object_mask) * confidence_loss * ignore_mask class_loss = object_mask * self.cross_entropy( prediction[:, :, :, :, 5:], class_probs) # Get smooth loss xy_loss = self.reduce_sum(xy_loss, ()) wh_loss = self.reduce_sum(wh_loss, ()) confidence_loss = self.reduce_sum(confidence_loss, ()) class_loss = self.reduce_sum(class_loss, ()) loss = xy_loss + wh_loss + confidence_loss + class_loss return loss / P.Shape()(prediction)[0]
def __init__(self, power=0., name='PowerTransform'): param = dict(locals()) param['param_dict'] = {'power': power} super(PowerTransform, self).__init__(name=name, param=param) self._power = self._add_parameter(power, 'power') check_greater_equal_zero(self._power, 'Power') self.pow = P.Pow() self.dtypeop = P.DType() self.cast = P.Cast() self.exp = exp_generic self.expm1 = P.Expm1() self.log = log_generic self.log1p = P.Log1p()
def __init__(self, num_sampled, num_classes, num_true=1, sampled_values=None, remove_accidental_hits=True, seed=0, reduction='none'): super(SampledSoftmaxLoss, self).__init__(reduction) if num_true < 1: raise ValueError(f"num_true {num_true} is less than 1.") if seed < 0: raise ValueError(f"seed {seed} is less than 0.") if num_sampled > num_classes: raise ValueError(f"num_sampled {num_sampled} is great than num_classes {num_classes}.") if num_true > num_classes: raise ValueError(f"num_true {num_true} is great than num_classes {num_classes}.") if sampled_values is not None: if not isinstance(sampled_values, (list, tuple)): raise TypeError(f"sampled_values {sampled_values} is not a list or tuple.") if len(sampled_values) != 3: raise ValueError(f"sampled_values size {len(sampled_values)} is not 3.") self.num_sampled = num_sampled self.num_classes = num_classes self.num_true = num_true self.sampled_values = sampled_values self.remove_accidental_hits = remove_accidental_hits self.seed = seed self.sampler = P.UniformCandidateSampler( num_true, num_sampled, True, num_classes, seed, remove_accidental_hits) self.cast = P.Cast() self.reshape = P.Reshape() self.shape = P.Shape() self.exp = P.Exp() self.log = P.Log() self.slice_op = P.Slice() self.matmul = P.MatMul(False, True) self.gather_v2 = P.Gather() self.reduce_max_true = P.ReduceMax(True) self.reduce_sum = P.ReduceSum() self.reduce_sum_true = P.ReduceSum(True) self.concat_dim0 = P.Concat(0) self.concat_dim1 = P.Concat(1) self.ones_like = P.OnesLike() self.zeros_like = P.ZerosLike() self.mul = P.Mul() self.expand_dims = P.ExpandDims() self.dtype = P.DType()
def construct(self, grid, prediction, pred_xy, pred_wh, y_true, gt_box, input_shape): # prediction : origin output from yolo # pred_xy: (sigmoid(xy)+grid)/grid_size # pred_wh: (exp(wh)*anchors)/input_shape # y_true : after normalize # gt_box: [batch, maxboxes, xyhw] after normalize object_mask = y_true[:, :, :, :, 4:5] class_probs = y_true[:, :, :, :, 5:] grid_shape = P.Shape()(prediction)[1:3] grid_shape = P.Cast()(F.tuple_to_array(grid_shape[::-1]), ms.float32) pred_boxes = self.concat((pred_xy, pred_wh)) true_xy = y_true[:, :, :, :, :2] * grid_shape - grid true_wh = y_true[:, :, :, :, 2:4] true_wh = P.Select()(P.Equal()(true_wh, 0.0), P.Fill()(P.DType()(true_wh), P.Shape()(true_wh), 1.0), true_wh) true_wh = P.Log()(true_wh / self.anchors * input_shape) # 2-w*h for large picture, use small scale, since small obj need more precise box_loss_scale = 2 - y_true[:, :, :, :, 2:3] * y_true[:, :, :, :, 3:4] gt_shape = P.Shape()(gt_box) gt_box = P.Reshape()(gt_box, (gt_shape[0], 1, 1, 1, gt_shape[1], gt_shape[2])) # add one more dimension for broadcast iou = self.iou(P.ExpandDims()(pred_boxes, -2), gt_box) # gt_box is x,y,h,w after normalize # [batch, grid[0], grid[1], num_anchor, num_gt] best_iou = self.reduce_max(iou, -1) # [batch, grid[0], grid[1], num_anchor] # ignore_mask IOU too small ignore_mask = best_iou < self.ignore_threshold ignore_mask = P.Cast()(ignore_mask, ms.float32) ignore_mask = P.ExpandDims()(ignore_mask, -1) # ignore_mask backpro will cause a lot maximunGrad and minimumGrad time consume. # so we turn off its gradient ignore_mask = F.stop_gradient(ignore_mask) xy_loss = self.xy_loss(object_mask, box_loss_scale, prediction[:, :, :, :, :2], true_xy) wh_loss = self.wh_loss(object_mask, box_loss_scale, prediction[:, :, :, :, 2:4], true_wh) confidence_loss = self.confidenceLoss(object_mask, prediction[:, :, :, :, 4:5], ignore_mask) class_loss = self.classLoss(object_mask, prediction[:, :, :, :, 5:], class_probs) loss = xy_loss + wh_loss + confidence_loss + class_loss batch_size = P.Shape()(prediction)[0] return loss / batch_size
def __init__(self, low=None, high=None, seed=None, dtype=mstype.float32, name="Uniform"): """ Constructor of Uniform distribution. """ param = dict(locals()) valid_dtype = mstype.float_type check_type(dtype, valid_dtype, type(self).__name__) super(Uniform, self).__init__(seed, dtype, name, param) self.parameter_type = set_param_type({ 'low': low, 'high': high }, self.dtype) if low is not None and high is not None: self._low = cast_to_tensor(low, self.parameter_type) self._high = cast_to_tensor(high, self.parameter_type) check_greater(self.low, self.high, "low value", "high value") else: self._low = low if low is None else cast_to_tensor( low, self.parameter_type) self._high = high if high is None else cast_to_tensor( high, self.parameter_type) self.default_parameters = [self.low, self.high] self.parameter_names = ['low', 'high'] # ops needed for the class self.exp = exp_generic self.log = log_generic self.squeeze = P.Squeeze(0) self.cast = P.Cast() self.const = P.ScalarToArray() self.dtypeop = P.DType() self.fill = P.Fill() self.less = P.Less() self.lessequal = P.LessEqual() self.logicaland = P.LogicalAnd() self.select = P.Select() self.shape = P.Shape() self.sq = P.Square() self.sqrt = P.Sqrt() self.zeroslike = P.ZerosLike() self.uniform = C.uniform self.sametypeshape = P.SameTypeShape()
def __init__(self): super(NpuFloatNet, self).__init__() self.mul = P.Mul() self.alloc_status = P.NPUAllocFloatStatus() self.get_status = P.NPUGetFloatStatus() self.clear_status = P.NPUClearFloatStatus() self.fill = P.Fill() self.shape_op = P.Shape() self.select = P.Select() self.less = P.Less() self.cast = P.Cast() self.dtype = P.DType() self.reduce_sum = P.ReduceSum(keep_dims=True) self.sub = P.Sub() self.neg = P.Neg()
def __init__(self): super(ClipByNorm, self).__init__() self.reduce_sum = P.ReduceSum(keep_dims=True) self.select_ = P.Select() self.greater_ = P.Greater() self.axis = () self.cast = P.Cast() self.zero = Tensor(np.array([0.0]).astype(np.float32)) self.sqrt = P.Sqrt() self.max_op = P.Maximum() self.shape = P.Shape() self.reshape = P.Reshape() self.fill = P.Fill() self.expand_dims = P.ExpandDims() self.dtype = P.DType()
def construct(self, data, label): weights = self.weights loss = self.network(data, label) sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) grads = self.grad(self.network, weights)(data, label, sens) norm = self.hyper_map(F.partial(compute_norm), grads) norm = self.concat(norm) norm = self.norm(norm) cond = self.greater(norm, self.cast(self.ten, self.dtype(norm))) clip_val = self.select(cond, norm, self.cast(self.ten, self.dtype(norm))) grads = self.hyper_map(F.partial(grad_div, clip_val), grads) if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) return F.depend(loss, self.optimizer(grads))
def __init__(self, has_attention_mask=False, dtype=mstype.float32): super(BertAttentionMask, self).__init__() self.has_attention_mask = has_attention_mask self.multiply_data = Tensor([ -1000.0, ], dtype=dtype) self.multiply = P.Mul() if self.has_attention_mask: self.expand_dims = P.ExpandDims() self.sub = P.Sub() self.add = P.TensorAdd() self.cast = P.Cast() self.get_dtype = P.DType()
def _IgammaSeries(ax, x, a, enabled): """Helper function for computing Igamma using a power series.""" logicaland = P.LogicalAnd() greater = P.Greater() fill = P.Fill() shape = P.Shape() dtype = P.DType() select = P.Select() if dtype(ax) == mstype.float16: epsilon = eps_fp16 else: epsilon = eps_fp32 def cond(vals): enabled = vals[0] return enabled def body(vals): enabled = vals[0] r = vals[1] c = vals[2] ans = vals[3] x = vals[4] dc_da = vals[5] dans_da = vals[6] r = r + 1 dc_da = dc_da * (x / r) + (-1 * c * x) / (r * r) dans_da = dans_da + dc_da c = c * (x / r) ans = ans + c conditional = logicaland(enabled, greater(c / ans, epsilon)) return (conditional, select(enabled, r, vals[1]), select(enabled, c, vals[2]), select(enabled, ans, vals[3]), select(enabled, x, vals[4]), select(enabled, dc_da, vals[5]), select(enabled, dans_da, vals[6])) ones = fill(dtype(a), shape(a), 1) zeros = fill(dtype(a), shape(a), 0) vals = (enabled, a, ones, ones, x, zeros, zeros) vals = _while_helper_func(cond, body, vals) ans = vals[3] return (ans * ax) / a
def check_tensor_type(name, inputs, valid_type): """ Check if inputs is proper. Args: name: inputs name inputs: Tensor to be checked. Raises: ValueError: if inputs is not a proper Tensor. """ if not isinstance(inputs, Tensor): raise TypeError(f"{name} should be a Tensor") input_type = P.DType()(inputs) if input_type not in valid_type: raise TypeError(f"{name} dtype is invalid")
def __init__(self, loc=None, scale=None, seed=None, dtype=mstype.float32, name="Logistic"): """ Constructor of Logistic. """ param = dict(locals()) param['param_dict'] = {'loc': loc, 'scale': scale} valid_dtype = mstype.float_type Validator.check_type_name("dtype", dtype, valid_dtype, type(self).__name__) super(Logistic, self).__init__(seed, dtype, name, param) self._loc = self._add_parameter(loc, 'loc') self._scale = self._add_parameter(scale, 'scale') if self._scale is not None: check_greater_zero(self._scale, "scale") # ops needed for the class self.cast = P.Cast() self.const = P.ScalarToArray() self.consttensor = P.ScalarToTensor() self.dtypeop = P.DType() self.exp = exp_generic self.expm1 = P.Expm1() self.fill = P.Fill() self.less = P.Less() self.log = log_generic self.log1p = P.Log1p() self.logicalor = P.LogicalOr() self.erf = P.Erf() self.greater = P.Greater() self.sigmoid = P.Sigmoid() self.squeeze = P.Squeeze(0) self.select = P.Select() self.shape = P.Shape() self.softplus = self._softplus self.sqrt = P.Sqrt() self.uniform = C.uniform self.threshold = np.log(np.finfo(np.float32).eps) + 1. self.tiny = np.finfo(np.float).tiny self.sd_const = np.pi / np.sqrt(3)
def exp_by_step(input_x): """ Log op on Ascend doesn't supprot int types. Fix this with casting the type. """ exp = P.Exp() cast = P.Cast() dtype = P.DType() checktype = P.IsSubClass() if checktype(dtype(input_x), mstype.int_): input_x = cast(input_x, mstype.float32) elif checktype(dtype(input_x), mstype.float_): pass else: return None return exp(input_x)
def __init__(self, concentration=None, rate=None, seed=None, dtype=mstype.float32, name="Gamma"): """ Constructor of Gamma. """ param = dict(locals()) param['param_dict'] = {'concentration': concentration, 'rate': rate} valid_dtype = mstype.float_type Validator.check_type_name("dtype", dtype, valid_dtype, type(self).__name__) # As some operators can't accept scalar input, check the type here if isinstance(concentration, (int, float)): raise TypeError("Input concentration can't be scalar") if isinstance(rate, (int, float)): raise TypeError("Input rate can't be scalar") super(Gamma, self).__init__(seed, dtype, name, param) self._concentration = self._add_parameter(concentration, 'concentration') self._rate = self._add_parameter(rate, 'rate') if self._concentration is not None: check_greater_zero(self._concentration, "concentration") if self._rate is not None: check_greater_zero(self._rate, "rate") # ops needed for the class self.log = log_generic self.square = P.Square() self.sqrt = P.Sqrt() self.squeeze = P.Squeeze(0) self.cast = P.Cast() self.dtypeop = P.DType() self.fill = P.Fill() self.shape = P.Shape() self.select = P.Select() self.greater = P.Greater() self.lgamma = nn.LGamma() self.digamma = nn.DiGamma() self.igamma = nn.IGamma()
def __init__(self, is_constant_jacobian=False, is_injective=True, name=None, dtype=None, param=None): """ Constructor of Bijector class. """ super(Bijector, self).__init__() validator.check_value_type('name', name, [str], type(self).__name__) validator.check_value_type('is_constant_jacobian', is_constant_jacobian, [bool], name) validator.check_value_type('is_injective', is_injective, [bool], name) if dtype is not None: validator.check_type_name("dtype", dtype, mstype.float_type, type(self).__name__) self._name = name self._dtype = dtype self._parameters = {} # parsing parameters for k in param.keys(): if k == 'param': continue if not (k == 'self' or k.startswith('_')): self._parameters[k] = param[k] # if no bijector is used as an argument during initilization if 'bijector' not in param.keys(): self._batch_shape = self._calc_batch_shape() self._is_scalar_batch = self._check_is_scalar_batch() self._is_constant_jacobian = is_constant_jacobian self._is_injective = is_injective self.context_mode = context.get_context('mode') self.checktensor = CheckTensor() # ops needed for the base class self.cast_base = P.Cast() self.dtype_base = P.DType() self.shape_base = P.Shape() self.fill_base = P.Fill() self.sametypeshape_base = P.SameTypeShape() self.issubclass_base = P.IsSubClass()
def get_bprop_matrix_diag_part(self): """Generate bprop for MatrixDiagPart""" get_dtype = P.DType() def bprop(x, y, out, dout): x_shape = F.shape(x)[-2:] if x_shape[0] == x_shape[1]: shape = F.shape(dout) dtype = get_dtype(dout) assist = _get_matrix_diag_assist(shape, dtype) return inner.MatrixDiag()(dout, assist), zeros_like(y) shape = F.shape(x) dtype = get_dtype(x) assist = _get_matrix_diag_part_assist(shape, dtype) return inner.MatrixSetDiag()(zeros_like(x), dout, assist), zeros_like(y) return bprop
def construct(self, u_id, pos_item_id, neg_item_id, pos_users, pos_items, u_group_nodes, u_neighs, u_gnew_neighs, i_group_nodes, i_neighs, i_gnew_neighs, neg_group_nodes, neg_neighs, neg_gnew_neighs): """Grad process""" weights = self.weights loss = self.network(u_id, pos_item_id, neg_item_id, pos_users, pos_items, u_group_nodes, u_neighs, u_gnew_neighs, i_group_nodes, i_neighs, i_gnew_neighs, neg_group_nodes, neg_neighs, neg_gnew_neighs) sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) grads = self.grad(self.network, weights)(u_id, pos_item_id, neg_item_id, pos_users, pos_items, u_group_nodes, u_neighs, u_gnew_neighs, i_group_nodes, i_neighs, i_gnew_neighs, neg_group_nodes, neg_neighs, neg_gnew_neighs, sens) return F.depend(loss, self.optimizer(grads))
def construct(self, x): if not self.training: return x if self.is_gpu: out, _ = self.dropout(x) return out if self.keep_prob == 1: return x shape = self.get_shape(x) dtype = P.DType()(x) if _is_float_dtype(dtype): keep_prob = self.cast(self.keep_prob, dtype) else: keep_prob = self.cast(self.keep_prob, mstype.float16) output = self.dropout_gen_mask(shape, keep_prob) return self.dropout_do_mask(x, output, keep_prob)
def __init__(self): super(LBeta, self).__init__() # const numbers self.log_2pi = np.log(2 * np.pi) self.minimax_coeff = [-0.165322962780713e-02, 0.837308034031215e-03, -0.595202931351870e-03, 0.793650666825390e-03, -0.277777777760991e-02, 0.833333333333333e-01] # operations self.log = P.Log() self.log1p = P.Log1p() self.less = P.Less() self.select = P.Select() self.shape = P.Shape() self.dtype = P.DType() self.lgamma = LGamma()