def test_square_normal(): context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU") x_np = np.random.rand(2, 3, 4, 4).astype(np.float32) output_ms = P.Square()(Tensor(x_np)) output_np = np.square(x_np) assert np.allclose(output_ms.asnumpy(), output_np) x_np = np.random.rand(2, 3, 1, 5, 4, 4).astype(np.float32) output_ms = P.Square()(Tensor(x_np)) output_np = np.square(x_np) assert np.allclose(output_ms.asnumpy(), output_np) x_np = np.random.rand(2, ).astype(np.float32) output_ms = P.Square()(Tensor(x_np)) output_np = np.square(x_np) assert np.allclose(output_ms.asnumpy(), output_np)
def __init__(self, probs=None, seed=0, dtype=mstype.int32, name="Geometric"): """ Constructor of Geometric distribution. """ param = dict(locals()) super(Geometric, self).__init__(dtype, name, param) if probs is not None: self._probs = cast_to_tensor(probs, dtype=mstype.float32) check_prob(self._probs) else: self._probs = probs self.minval = np.finfo(np.float).tiny # ops needed for the class self.const = P.ScalarToArray() self.dtypeop = P.DType() self.fill = P.Fill() self.floor = P.Floor() self.issubclass = P.IsSubClass() self.less = P.Less() self.log = P.Log() self.pow = P.Pow() self.select = P.Select() self.shape = P.Shape() self.sq = P.Square() self.sqrt = P.Sqrt() self.uniform = P.UniformReal(seed=seed)
def __init__(self, rate=None, seed=0, dtype=mstype.float32, name="Exponential"): """ Constructor of Exponential distribution. """ param = dict(locals()) super(Exponential, self).__init__(dtype, name, param) if rate is not None: self._rate = cast_to_tensor(rate, mstype.float32) check_greater_zero(self._rate, "rate") else: self._rate = rate self.minval = np.finfo(np.float).tiny # ops needed for the class self.const = P.ScalarToArray() self.dtypeop = P.DType() self.exp = P.Exp() self.fill = P.Fill() self.less = P.Less() self.log = P.Log() self.select = P.Select() self.shape = P.Shape() self.sqrt = P.Sqrt() self.sq = P.Square() self.uniform = P.UniformReal(seed=seed)
def __init__(self, generator, variational): super().__init__() self.generator = generator self.variational = variational self.reshape_op = P.Reshape() self.reduce_mean = P.ReduceMean(keep_dims=False) self.square = P.Square()
def __init__(self, low=None, high=None, seed=0, dtype=mstype.float32, name="Uniform"): """ Constructor of Uniform distribution. """ param = dict(locals()) super(Uniform, self).__init__(dtype, name, param) if low is not None and high is not None: self._low = convert_to_batch(low, self._broadcast_shape, dtype) self._high = convert_to_batch(high, self._broadcast_shape, dtype) check_greater(self.low, self.high, "low value", "high value") else: self._low = low self._high = high # ops needed for the class self.const = P.ScalarToArray() self.dtypeop = P.DType() self.exp = P.Exp() self.fill = P.Fill() self.less = P.Less() self.lessequal = P.LessEqual() self.log = P.Log() self.logicaland = P.LogicalAnd() self.select = P.Select() self.shape = P.Shape() self.sq = P.Square() self.sqrt = P.Sqrt() self.uniform = P.UniformReal(seed=seed) self.zeroslike = P.ZerosLike()
def __init__(self, probs=None, seed=0, dtype=mstype.int32, name="Bernoulli"): """ Constructor of Bernoulli distribution. """ param = dict(locals()) valid_dtype = mstype.int_type + mstype.uint_type + mstype.float_type check_type(dtype, valid_dtype, type(self).__name__) super(Bernoulli, self).__init__(seed, dtype, name, param) self.parameter_type = mstype.float32 if probs is not None: self._probs = cast_to_tensor(probs, mstype.float32) check_prob(self.probs) else: self._probs = probs # ops needed for the class self.exp = exp_generic self.log = log_generic self.erf = erf_generic self.squeeze = P.Squeeze(0) self.cast = P.Cast() self.const = P.ScalarToArray() self.dtypeop = P.DType() self.floor = P.Floor() self.fill = P.Fill() self.less = P.Less() self.shape = P.Shape() self.select = P.Select() self.sq = P.Square() self.sqrt = P.Sqrt() self.uniform = C.uniform
def __init__(self, mean=None, sd=None, seed=0, dtype=mstype.float32, name="Normal"): """ Constructor of normal distribution. """ param = dict(locals()) super(Normal, self).__init__(dtype, name, param) if mean is not None and sd is not None: self._mean_value = convert_to_batch(mean, self._broadcast_shape, dtype) self._sd_value = convert_to_batch(sd, self._broadcast_shape, dtype) check_greater_equal_zero(self._sd_value, "Standard deviation") else: self._mean_value = mean self._sd_value = sd self.seed = seed #ops needed for the class self.const = P.ScalarToArray() self.erf = P.Erf() self.exp = P.Exp() self.expm1 = self._expm1_by_step self.fill = P.Fill() self.log = P.Log() self.shape = P.Shape() self.sq = P.Square() self.sqrt = P.Sqrt() self.zeroslike = P.ZerosLike()
def test_square(): """ test_square """ input_tensor = Tensor(np.array([[1, 2, 3], [4, 5, 6]])) square = P.Square() result = square(input_tensor) expect = np.array([[1, 4, 9], [16, 25, 36]]) assert np.all(result.asnumpy() == expect)
def __init__(self, loc=None, scale=None, seed=0, dtype=mstype.float32, name="LogNormal"): """ Constructor of LogNormal distribution. """ super(LogNormal, self).__init__(distribution=msd.Normal(loc, scale, dtype=dtype), bijector=msb.Exp(), seed=seed, name=name) self.log_2pi = np.log(2 * np.pi) #ops needed for the class self.exp = exp_generic self.expm1 = expm1_generic self.log = log_generic self.const = P.ScalarToArray() self.erf = P.Erf() self.fill = P.Fill() self.shape = P.Shape() self.sq = P.Square() self.sqrt = P.Sqrt() self.zeroslike = P.ZerosLike()
def __init__(self, rate=None, seed=0, dtype=mstype.float32, name="Exponential"): """ Constructor of Exponential distribution. """ param = dict(locals()) valid_dtype = mstype.float_type check_type(dtype, valid_dtype, type(self).__name__) super(Exponential, self).__init__(seed, dtype, name, param) self.parameter_type = dtype if rate is not None: self._rate = cast_to_tensor(rate, self.parameter_type) check_greater_zero(self._rate, "rate") else: self._rate = rate self.minval = np.finfo(np.float).tiny # ops needed for the class self.exp = exp_generic self.log = log_generic self.squeeze = P.Squeeze(0) self.cast = P.Cast() self.const = P.ScalarToArray() self.dtypeop = P.DType() self.fill = P.Fill() self.less = P.Less() self.select = P.Select() self.shape = P.Shape() self.sqrt = P.Sqrt() self.sq = P.Square() self.uniform = C.uniform
def __init__(self, mean=None, sd=None, seed=0, dtype=mstype.float32, name="Normal"): """ Constructor of normal distribution. """ param = dict(locals()) super(Normal, self).__init__(dtype, name, param) if mean is not None and sd is not None: self._mean_value = convert_to_batch(mean, self._broadcast_shape, dtype) self._sd_value = convert_to_batch(sd, self._broadcast_shape, dtype) check_greater_equal_zero(self._sd_value, "Standard deviation") else: self._mean_value = mean self._sd_value = sd #ops needed for the class self.exp = P.Exp() self.add = P.TensorAdd() self.mul = P.Mul() self.sq = P.Square() self.log = P.Log() self.sqrt = P.Sqrt() self.realdiv = P.RealDiv() self.expm1 = P.Expm1() if get_context( 'device_target') == 'Ascend' else self._expm1_by_step self.normal = P.Normal(seed=seed) self.shape = P.Shape() self.zeroslike = P.ZerosLike() self.const = P.ScalarToArray()
def __init__(self, probs=None, seed=0, dtype=mstype.int32, name="Bernoulli"): """ Constructor of Bernoulli distribution. """ param = dict(locals()) super(Bernoulli, self).__init__(dtype, name, param) if probs is not None: self._probs = cast_to_tensor(probs, dtype=mstype.float32) check_prob(self.probs) else: self._probs = probs self.seed = seed # ops needed for the class self.cast = P.Cast() self.const = P.ScalarToArray() self.dtypeop = P.DType() self.erf = P.Erf() self.fill = P.Fill() self.log = P.Log() self.less = P.Less() self.shape = P.Shape() self.select = P.Select() self.sq = P.Square() self.sqrt = P.Sqrt() self.uniform = P.UniformReal(seed=seed)
def __init__(self, loc=None, scale=None, seed=0, dtype=mstype.float32, name="LogNormal"): """ Constructor of LogNormal distribution. """ super(LogNormal, self).__init__(distribution=msd.Normal(loc, scale, dtype=dtype), bijector=msb.Exp(), seed=seed, name=name) # overwrite default_parameters and parameter_names self._reset_parameters() self._loc = self._add_parameter(loc, 'loc') self._scale = self._add_parameter(scale, 'scale') self.log_2pi = np.log(2 * np.pi) #ops needed for the class self.dtypeop = P.DType() self.exp = exp_generic self.expm1 = P.Expm1() self.log = log_generic self.const = P.ScalarToArray() self.erf = P.Erf() self.fill = P.Fill() self.greater = P.Greater() self.select = P.Select() self.shape = P.Shape() self.sq = P.Square() self.sqrt = P.Sqrt() self.cast = P.Cast() self.squeeze = P.Squeeze(0)
def __init__(self, mean=None, sd=None, seed=None, dtype=mstype.float32, name="Normal"): """ Constructor of Normal. """ param = dict(locals()) param['param_dict'] = {'mean': mean, 'sd': sd} valid_dtype = mstype.float_type Validator.check_type(type(self).__name__, dtype, valid_dtype) super(Normal, self).__init__(seed, dtype, name, param) self._mean_value = self._add_parameter(mean, 'mean') self._sd_value = self._add_parameter(sd, 'sd') if self._sd_value is not None: check_greater_zero(self._sd_value, "Standard deviation") # ops needed for the class self.exp = exp_generic self.expm1 = expm1_generic self.log = log_generic self.erf = P.Erf() self.squeeze = P.Squeeze(0) self.cast = P.Cast() self.const = P.ScalarToArray() self.shape = P.Shape() self.sq = P.Square() self.sqrt = P.Sqrt()
def _update_run_op_for_map(beta1, beta2, eps, lr, weight_decay_tensor, param, m, v, gradient, decay_flag): op_mul = P.Mul() op_square = P.Square() op_sqrt = P.Sqrt() op_cast = P.Cast() op_reshape = P.Reshape() op_shape = P.Shape() param_fp32 = op_cast(param, mstype.float32) m_fp32 = op_cast(m, mstype.float32) v_fp32 = op_cast(v, mstype.float32) gradient_fp32 = op_cast(gradient, mstype.float32) next_m = op_mul(beta1, m_fp32) + op_mul( op_cast(F.tuple_to_array( (1.0, )), mstype.float32) - beta1, gradient_fp32) next_v = op_mul(beta2, v_fp32) + op_mul( op_cast(F.tuple_to_array( (1.0, )), mstype.float32) - beta2, op_square(gradient_fp32)) update = next_m / (op_sqrt(next_v) + eps) if decay_flag: update = update + op_mul(weight_decay_tensor, param_fp32) update_with_lr = op_mul(lr, update) next_param = param_fp32 - op_reshape(update_with_lr, op_shape(param_fp32)) next_v = F.depend(next_v, F.assign(param, next_param)) next_v = F.depend(next_v, F.assign(m, next_m)) next_v = F.depend(next_v, F.assign(v, next_v)) return next_v
def __init__(self, params, learning_rate, momentum, matrix_A, matrix_G, weight_decay=0.0, loss_scale=1.0, num_hidden_layers=24, batch_size=12, damping=0.03, decay_filter=lambda x: 'layernorm' not in x.name.lower() and 'bias' not in x.name.lower()): super(THOR, self).__init__(learning_rate, params, weight_decay, loss_scale) if isinstance(momentum, float) and momentum < 0.0: raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum)) self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum") self.params = self.parameters self.moments = self.params.clone(prefix="moments", init='zeros') self.hyper_map = C.HyperMap() self.opt = P.ApplyMomentum() self.matrix_A = ParameterTuple(matrix_A) self.matrix_G = ParameterTuple(matrix_G) self.matmul = P.MatMul() self.transpose = P.Transpose() self.shape = P.Shape() self.reshape = P.Reshape() self.mul = P.Mul() self.gather = P.GatherV2() self.matrix_A_inv = () self.matrix_G_inv = () self.num_hidden_layers = num_hidden_layers self.sqrt = P.Sqrt() self.assign = P.Assign() self.cast = P.Cast() self.thor = True self.weight_decay = weight_decay * loss_scale self.decay_flags = tuple(decay_filter(x) for x in self.parameters) self.expand = P.ExpandDims() self.square = P.Square() self.inv = P.Inv() self.batch_size = batch_size self.damping = damping self.one = Tensor(1, mstype.int32) self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
def __init__(self, mean=None, sd=None, seed=0, dtype=mstype.float32, name="Normal"): """ Constructor of normal distribution. """ param = dict(locals()) valid_dtype = mstype.float_type check_type(dtype, valid_dtype, type(self).__name__) super(Normal, self).__init__(seed, dtype, name, param) self.parameter_type = dtype if mean is not None and sd is not None: self._mean_value = cast_to_tensor(mean, self.parameter_type) self._sd_value = cast_to_tensor(sd, self.parameter_type) check_greater_zero(self._sd_value, "Standard deviation") else: self._mean_value = mean self._sd_value = sd #ops needed for the class self.exp = exp_generic self.expm1 = expm1_generic self.log = log_generic self.erf = erf_generic self.squeeze = P.Squeeze(0) self.cast = P.Cast() self.const = P.ScalarToArray() self.fill = P.Fill() self.shape = P.Shape() self.sq = P.Square() self.sqrt = P.Sqrt() self.zeroslike = P.ZerosLike()
def __init__(self, config): super(WideDeepModel, self).__init__() self.batch_size = config.batch_size self.field_size = config.field_size self.vocab_size = config.vocab_size self.emb_dim = config.emb_dim self.deep_layer_args = config.deep_layer_args self.deep_layer_dims_list, self.deep_layer_act = self.deep_layer_args self.init_args = config.init_args self.weight_init, self.bias_init = config.weight_bias_init self.weight_bias_init = config.weight_bias_init self.emb_init = config.emb_init self.drop_out = config.dropout_flag self.keep_prob = config.keep_prob self.deep_input_dims = self.field_size * self.emb_dim self.layer_dims = self.deep_layer_dims_list + [1] self.all_dim_list = [self.deep_input_dims] + self.layer_dims init_acts = [('Wide_w', [self.vocab_size, 1], self.emb_init), ('V_l2', [self.vocab_size, self.emb_dim], self.emb_init), ('Wide_b', [1], self.emb_init)] var_map = init_var_dict(self.init_args, init_acts) self.wide_w = var_map["Wide_w"] self.wide_b = var_map["Wide_b"] self.embedding_table = var_map["V_l2"] self.dense_layer_1 = DenseLayer(self.all_dim_list[0], self.all_dim_list[1], self.weight_bias_init, self.deep_layer_act, convert_dtype=True) self.dense_layer_2 = DenseLayer(self.all_dim_list[1], self.all_dim_list[2], self.weight_bias_init, self.deep_layer_act, convert_dtype=True) self.dense_layer_3 = DenseLayer(self.all_dim_list[2], self.all_dim_list[3], self.weight_bias_init, self.deep_layer_act, convert_dtype=True) self.dense_layer_4 = DenseLayer(self.all_dim_list[3], self.all_dim_list[4], self.weight_bias_init, self.deep_layer_act, convert_dtype=True) self.dense_layer_5 = DenseLayer(self.all_dim_list[4], self.all_dim_list[5], self.weight_bias_init, self.deep_layer_act, convert_dtype=True) self.gather_v2 = P.GatherV2() self.mul = P.Mul() self.reduce_sum = P.ReduceSum(keep_dims=False) self.reshape = P.Reshape() self.square = P.Square() self.shape = P.Shape() self.tile = P.Tile() self.concat = P.Concat(axis=1) self.cast = P.Cast()
def __init__(self, seed, dtype, name, param): """ Constructor of distribution class. """ super(Distribution, self).__init__() if seed is None: seed = 0 validator.check_value_type('name', name, [str], type(self).__name__) validator.check_non_negative_int(seed, 'seed', name) self._name = name self._seed = seed self._dtype = cast_type_for_device(dtype) self._parameters = {} # parsing parameters for k in param.keys(): if not(k == 'self' or k.startswith('_')): self._parameters[k] = param[k] # some attributes if 'distribution' in self.parameters.keys(): self.parameter_type = self.parameters['distribution'].parameter_type else: self.parameter_type = set_param_type(self.parameters['param_dict'], dtype) self._broadcast_shape = self._calc_broadcast_shape() self._is_scalar_batch = self._check_is_scalar_batch() # set the function to call according to the derived class's attributes self._set_prob() self._set_log_prob() self._set_sd() self._set_var() self._set_cdf() self._set_survival() self._set_log_cdf() self._set_log_survival() self._set_cross_entropy() self.context_mode = context.get_context('mode') self.device_target = context.get_context('device_target') self.checktuple = CheckTuple() self.checktensor = CheckTensor() self.broadcast = broadcast_to # ops needed for the base class self.cast_base = P.Cast() self.dtype_base = P.DType() self.exp_base = exp_generic self.fill_base = P.Fill() self.log_base = log_generic self.sametypeshape_base = P.SameTypeShape() self.sq_base = P.Square() self.sqrt_base = P.Sqrt() self.shape_base = P.Shape()
def _update_run_op(beta1, beta2, eps, lr, weight_decay, param, m, v, gradient, decay_flag, optim_filter): """ Update parameters. Args: beta1 (Tensor): The exponential decay rate for the 1st moment estimations. Should be in range (0.0, 1.0). beta2 (Tensor): The exponential decay rate for the 2nd moment estimations. Should be in range (0.0, 1.0). eps (Tensor): Term added to the denominator to improve numerical stability. Should be greater than 0. lr (Tensor): Learning rate. weight_decay (Number): Weight decay. Should be equal to or greater than 0. param (Tensor): Parameters. m (Tensor): m value of parameters. v (Tensor): v value of parameters. gradient (Tensor): Gradient of parameters. decay_flag (bool): Applies weight decay or not. optim_filter (bool): Applies parameter update or not. Returns: Tensor, the new value of v after updating. """ if optim_filter: op_mul = P.Mul() op_square = P.Square() op_sqrt = P.Sqrt() op_cast = P.Cast() op_reshape = P.Reshape() op_shape = P.Shape() param_fp32 = op_cast(param, mstype.float32) m_fp32 = op_cast(m, mstype.float32) v_fp32 = op_cast(v, mstype.float32) gradient_fp32 = op_cast(gradient, mstype.float32) next_m = op_mul(beta1, m_fp32) + op_mul( op_cast(F.tuple_to_array( (1.0, )), mstype.float32) - beta1, gradient_fp32) next_v = op_mul(beta2, v_fp32) + op_mul( op_cast(F.tuple_to_array( (1.0, )), mstype.float32) - beta2, op_square(gradient_fp32)) update = next_m / (eps + op_sqrt(next_v)) if decay_flag: update = op_mul(weight_decay, param_fp32) + update update_with_lr = op_mul(lr, update) next_param = param_fp32 - op_reshape(update_with_lr, op_shape(param_fp32)) next_param = F.depend( next_param, F.assign(param, op_cast(next_param, F.dtype(param)))) next_param = F.depend(next_param, F.assign(m, op_cast(next_m, F.dtype(m)))) next_param = F.depend(next_param, F.assign(v, op_cast(next_v, F.dtype(v)))) return op_cast(next_param, F.dtype(param)) return gradient
def __init__(self, network, l2_coef=1e-6): super(NetWithLossClass, self).__init__(auto_prefix=False) self.loss = P.SigmoidCrossEntropyWithLogits() self.network = network self.l2_coef = l2_coef self.Square = P.Square() self.ReduceMean_false = P.ReduceMean(keep_dims=False) self.ReduceSum_false = P.ReduceSum(keep_dims=False)
def construct(self, x, y, z): sub_res = self.sub(x, y) mul_res = self.mul(sub_res, x) sqrt_grad_res = self.sqrt_grad(mul_res, z) square_res = P.Square()(sqrt_grad_res) add_res = self.add(sqrt_grad_res, square_res) add1_res = self.add(add_res, add_res) return self.add(add1_res, add1_res)
def construct(self, x, y): sub_res = self.sub(x, y) mul_res = self.mul(sub_res, x) relu_res = self.relu(mul_res) square_res = P.Square()(relu_res) add_res = self.add(relu_res, square_res) add1_res = self.add(add_res, add_res) return self.add(add1_res, add1_res)
def __init__(self, network, config): super(NetWithLossClass, self).__init__(auto_prefix=False) self.network = network self.l2_coef = config.l2_coef self.loss = P.SigmoidCrossEntropyWithLogits() self.square = P.Square().set_strategy(((1, get_group_size()),)) self.reduceMean_false = P.ReduceMean(keep_dims=False) self.reduceSum_false = P.ReduceSum(keep_dims=False)
def _update_run_op_graph_kernel(beta1, beta2, eps, global_step, lr, weight_decay, param, m, v, gradient, decay_flag): """ Update parameters. Args: beta1 (Tensor): The exponential decay rate for the 1st moment estimations. Should be in range (0.0, 1.0). beta2 (Tensor): The exponential decay rate for the 2nd moment estimations. Should be in range (0.0, 1.0). eps (Tensor): Term added to the denominator to improve numerical stability. Should be greater than 0. lr (Tensor): Learning rate. weight_decay (Number): Weight decay. Should be equal to or greater than 0. global_step (Tensor): Global step. param (Tensor): Parameters. m (Tensor): m value of parameters. v (Tensor): v value of parameters. gradient (Tensor): Gradient of parameters. decay_flag (bool): Specifies whether param update with weight decay. Returns: Tensor, the new value of v after updating. """ op_mul = P.Mul() op_square = P.Square() op_cast = P.Cast() op_shape = P.Shape() op_pow = P.Pow() op_norm = layer.Norm() op_fill = P.Fill() op_dtype = P.DType() param_fp32 = op_cast(param, mstype.float32) gradient_fp32 = op_cast(gradient, mstype.float32) i6_ex = op_cast(global_step + num_one, mstype.float32) i9 = op_cast(num_one, mstype.float32) - beta1 x1 = op_cast(num_one, mstype.float32) - beta2 i6 = op_cast(num_one, mstype.float32) - op_pow(beta1, i6_ex) i3 = op_cast(num_one, mstype.float32) - op_pow(beta2, i6_ex) i1 = op_square(gradient_fp32) add3, update = G.LambNextMV()(i1, v, i3, gradient, m, i6, param, beta1, i9, beta2, x1, weight_decay, eps) if decay_flag: update = update + op_mul(weight_decay, param_fp32) w_norm = op_norm(param_fp32) g_norm = op_norm(gradient_fp32) g_norm_hat = op_norm(add3) zeros = F.zeros_like(w_norm) ones = op_fill(op_dtype(w_norm), op_shape(w_norm), 1.0) tens = op_fill(op_dtype(w_norm), op_shape(w_norm), 10.0) next_param = G.LambUpdateWithLR()(g_norm, w_norm, g_norm_hat, lr, update, param, zeros, ones, tens) next_v = F.control_depend(add3, next_param) return next_v
def __init__(self, probs=None, seed=None, dtype=mstype.int32, name="Categorical"): param = dict(locals()) param['param_dict'] = {'probs': probs} valid_dtype = mstype.uint_type + mstype.int_type + mstype.float_type Validator.check_type_name("dtype", dtype, valid_dtype, type(self).__name__) super(Categorical, self).__init__(seed, dtype, name, param) self._probs = self._add_parameter(probs, 'probs') if self.probs is not None: check_rank(self.probs) check_prob(self.probs) check_sum_equal_one(probs) # update is_scalar_batch and broadcast_shape # drop one dimension if self.probs.shape[:-1] == (): self._is_scalar_batch = True self._broadcast_shape = self._broadcast_shape[:-1] self.argmax = P.ArgMaxWithValue(axis=-1) self.broadcast = broadcast_to self.cast = P.Cast() self.clip_by_value = C.clip_by_value self.concat = P.Concat(-1) self.cumsum = P.CumSum() self.dtypeop = P.DType() self.exp = exp_generic self.expand_dim = P.ExpandDims() self.fill = P.Fill() self.gather = P.GatherNd() self.greater = P.Greater() self.issubclass = P.IsSubClass() self.less = P.Less() self.log = log_generic self.log_softmax = P.LogSoftmax() self.logicor = P.LogicalOr() self.logicand = P.LogicalAnd() self.multinomial = P.Multinomial(seed=self.seed) self.reshape = P.Reshape() self.reduce_sum = P.ReduceSum(keep_dims=True) self.select = P.Select() self.shape = P.Shape() self.softmax = P.Softmax() self.squeeze = P.Squeeze() self.squeeze_first_axis = P.Squeeze(0) self.squeeze_last_axis = P.Squeeze(-1) self.square = P.Square() self.transpose = P.Transpose() self.is_nan = P.IsNan() self.index_type = mstype.int32 self.nan = np.nan
def _run_opt_with_sparse(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, m, v, ps_parameter, cache_enable): """Apply sparse adam optimizer to the weight parameter when the gradient is sparse.""" success = True indices = gradient.indices values = gradient.values if ps_parameter and not cache_enable: op_shape = P.Shape() shapes = (op_shape(param), op_shape(m), op_shape(v), op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1), op_shape(beta2), op_shape(eps), op_shape(values), op_shape(indices)) success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, values, indices), shapes), param)) return success if not target: success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2, eps, values, indices)) else: op_mul = P.Mul() op_square = P.Square() op_sqrt = P.Sqrt() scatter_add = P.ScatterAdd(use_locking) success = F.depend(success, F.assign(m, op_mul(beta1, m))) success = F.depend(success, F.assign(v, op_mul(beta2, v))) grad_indices = gradient.indices grad_value = gradient.values next_m = scatter_add(m, grad_indices, op_mul(F.tuple_to_array((1.0,)) - beta1, grad_value)) next_v = scatter_add(v, grad_indices, op_mul(F.tuple_to_array((1.0,)) - beta2, op_square(grad_value))) if use_nesterov: m_temp = next_m * _scaler_ten F.assign(m, op_mul(beta1, next_m)) div_value = scatter_add(m, op_mul(grad_indices, _scaler_one), op_mul(F.tuple_to_array((1.0,)) - beta1, grad_value)) param_update = div_value / (op_sqrt(next_v) + eps) F.assign(m, m_temp / _scaler_ten) else: param_update = next_m / (op_sqrt(next_v) + eps) lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power) next_param = param - lr_t * param_update success = F.depend(success, F.assign(param, next_param)) success = F.depend(success, F.assign(m, next_m)) success = F.depend(success, F.assign(v, next_v)) return success
def __init__(self): """""" super(Net, self).__init__() self.square = P.Square() self.add = P.TensorAdd() self.value = Tensor(3, dtype=ms.float32) self.switch = P.GeSwitch() self.merge = P.Merge() self.less = P.Less()
def __init__(self): super(openpose_loss, self).__init__() self.expand_dims = P.ExpandDims() self.tile = P.Tile() self.mul = P.Mul() self.l2_loss = P.L2Loss() self.square = P.Square() self.reduceMean = P.ReduceMean() self.reduceSum = P.ReduceSum() self.print = P.Print() self.shape = P.Shape() self.maxoftensor = P.ArgMaxWithValue(-1)
def __init__(self, config): super(DeepFMModel, self).__init__() self.batch_size = config.batch_size self.field_size = config.data_field_size self.vocab_size = config.data_vocab_size self.emb_dim = config.data_emb_dim self.deep_layer_dims_list, self.deep_layer_act = config.deep_layer_args self.init_args = config.init_args self.weight_bias_init = config.weight_bias_init self.keep_prob = config.keep_prob init_acts = [('W_l2', [self.vocab_size, 1], 'normal'), ('V_l2', [self.vocab_size, self.emb_dim], 'normal'), ('b', [1], 'normal')] var_map = init_var_dict(self.init_args, init_acts) self.fm_w = var_map["W_l2"] self.fm_b = var_map["b"] self.embedding_table = var_map["V_l2"] # Deep Layers self.deep_input_dims = self.field_size * self.emb_dim + 1 self.all_dim_list = [self.deep_input_dims ] + self.deep_layer_dims_list + [1] self.dense_layer_1 = DenseLayer(self.all_dim_list[0], self.all_dim_list[1], self.weight_bias_init, self.deep_layer_act, self.keep_prob) self.dense_layer_2 = DenseLayer(self.all_dim_list[1], self.all_dim_list[2], self.weight_bias_init, self.deep_layer_act, self.keep_prob) self.dense_layer_3 = DenseLayer(self.all_dim_list[2], self.all_dim_list[3], self.weight_bias_init, self.deep_layer_act, self.keep_prob) self.dense_layer_4 = DenseLayer(self.all_dim_list[3], self.all_dim_list[4], self.weight_bias_init, self.deep_layer_act, self.keep_prob) # Cross Layer self.cross_layer_1 = CrossLayer(self.field_size * self.emb_dim, self.weight_bias_init) self.cross_layer_2 = CrossLayer(self.field_size * self.emb_dim, self.weight_bias_init) # FM, linear Layers self.Gatherv2 = P.GatherV2() self.Mul = P.Mul() self.ReduceSum = P.ReduceSum(keep_dims=False) self.Reshape = P.Reshape() self.Square = P.Square() self.Shape = P.Shape() self.Tile = P.Tile() self.Concat = P.Concat(axis=1) self.Cast = P.Cast()