Пример #1
0
 def __init__(self, in_channel, x):
     super().__init__()
     #self._save_graphs(save_graph_flag=True, save_graph_path=".")
     self.biasadd = P.BiasAdd()
     self.equal = P.Equal()
     self.addn = P.AddN()
     self.conv = Conv2d(in_channels=in_channel,
                        out_channels=in_channel,
                        kernel_size=1,
                        stride=1,
                        has_bias=False,
                        weight_init='ones',
                        pad_mode='same')
     self.bn = BatchNorm2d(num_features=in_channel)
     self.assignadd = P.AssignAdd()
     self.assign = P.Assign()
     self.relu = ReLU()
     self.mean = P.ReduceMean(keep_dims=False)
     self.bias = Parameter(Tensor(
         np.random.randint(2, size=(3, )).astype((np.float32))),
                           name="bias")
     self.bias2 = Parameter(Tensor(np.ones([3]).astype(np.float32)),
                            name="bias2")
     self.parameterupdate = ParameterUpdate(self.bias)
     self.value = Tensor(np.random.randn(*(3, )), ms.float32)
     self.x = x
 def __init__(self):
     super().__init__()
     self.parameter1 = Parameter(Tensor([199.0], ms.float32), name="parameter1")
     self.assign = P.Assign()
     self.assignadd = P.AssignAdd()
     self.addn = P.AddN()
     self.depend = P.Depend()
Пример #3
0
    def __init__(self,
                 params,
                 learning_rate,
                 beta1=0.9,
                 beta2=0.999,
                 eps=1e-6,
                 weight_decay=0.0):
        super(Lamb, self).__init__(learning_rate, params, weight_decay)
        _check_param_value(beta1, beta2, eps, self.cls_name)

        # turn them to scalar when me support scalar/tensor mix operations
        self.beta1 = Tensor(np.array([beta1]).astype(np.float32))
        self.beta2 = Tensor(np.array([beta2]).astype(np.float32))
        self.eps = Tensor(np.array([eps]).astype(np.float32))
        self.params = self.parameters
        self.moments1 = self.params.clone(prefix="lamb_m", init='zeros')
        self.moments2 = self.params.clone(prefix="lamb_v", init='zeros')

        if not self.dynamic_lr:
            self.global_step = Parameter(initializer(0, [1]),
                                         name='global_step')
            self.assignadd = P.AssignAdd()
        self.hyper_map = C.HyperMap()
        self.enable_graph_kernel = context.get_context("enable_graph_kernel") and \
            context.get_context("device_target") == "Ascend"
Пример #4
0
 def __init__(self, params, learning_rate, momentum, weight_decay=0.0, loss_scale=1.0,
              decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name):
     super(Momentum, self).__init__(learning_rate, params)
     if isinstance(momentum, float) and momentum < 0.0:
         raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum))
     if isinstance(learning_rate, Iterable) or \
             (isinstance(learning_rate, Tensor) and learning_rate.dim() == 1):
         self.dynamic_lr = True
         self.gather = P.GatherV2()
         self.assignadd = P.AssignAdd()
         self.global_step = Parameter(initializer(0, [1], mstype.int32), name="global_step")
         self.axis = 0
     else:
         self.dynamic_lr = False
         self.gather = None
         self.assignadd = None
         self.global_step = None
         self.axis = None
     self.momentum = Parameter(momentum, name="momentum")
     self.params = self.parameters
     self.moments = self.params.clone(prefix="moments", init='zeros')
     self.decay_tf = tuple(decay_filter(x) for x in self.parameters)
     self.hyper_map = C.HyperMap()
     self.opt = P.ApplyMomentum()
     self.weight_decay = weight_decay * loss_scale
     self.reciprocal_scale = 1.0 / loss_scale
     self.one = Tensor(1, mstype.int32)
Пример #5
0
 def __init__(self,
              optimizer,
              epsilon=1e-05,
              hyperpara=0.001,
              weight_decay=0.0,
              use_clip=False,
              decay_filter=lambda x: 'LayerNorm' not in x.name and 'bias'
              not in x.name,
              lars_filter=lambda x: 'LayerNorm' not in x.name and 'bias'
              not in x.name,
              loss_scale=1.0):
     super(LARS, self).__init__(0.0,
                                [Parameter(Tensor(0.0), name="trivial")])
     self.opt = optimizer
     self.parameters = optimizer.parameters
     self.learning_rate = optimizer.learning_rate
     self.lars = P.LARSUpdate(epsilon, hyperpara, use_clip)
     self.reciprocal_scale = 1.0 / loss_scale
     self.weight_decay = weight_decay * loss_scale
     self.cast = P.Cast()
     self.decay_flag = tuple(decay_filter(x) for x in self.parameters)
     self.lars_flag = tuple(lars_filter(x) for x in self.parameters)
     self.hyper_map = C.HyperMap()
     self.dynamic_lr = False
     self.gather = None
     self.global_step = None
     self.axis = None
     if isinstance(self.learning_rate.default_input, Iterable) or \
             (isinstance(self.learning_rate.default_input, Tensor) and self.learning_rate.default_input.dim() == 1):
         self.dynamic_lr = True
         self.assignadd = P.AssignAdd()
         self.gather = P.GatherV2()
         self.global_step = Parameter(initializer(0, [1], mstype.int32),
                                      name="lars_global_step")
         self.axis = 0
Пример #6
0
 def __init__(self):
     super().__init__()
     self.assign_sub = P.AssignAdd()
     self.mul = P.Mul()
     self.mul_weight = Parameter(Tensor(
         np.full([128, 32], 0.5, dtype=np.float32)),
                                 name="mul_weight")
     self.assignsub_weight = Parameter(Tensor(
         np.full([128, 32], 1.1, dtype=np.float32)),
                                       name="assignsub_weight")
Пример #7
0
    def __init__(self,
                 learning_rate,
                 parameters,
                 weight_decay=0.0,
                 loss_scale=1.0,
                 decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in
                 x.name):
        super(Optimizer, self).__init__()
        if isinstance(learning_rate, float):
            self.dynamic_lr = False
            self.gather = None
            self.assignadd = None
            self.global_step = None
            validator.check_number_range("learning rate", learning_rate, 0.0,
                                         float("inf"), Rel.INC_LEFT)
        else:
            self.dynamic_lr = True
            self.gather = P.GatherV2()
            self.assignadd = P.AssignAdd()
            self.global_step = Parameter(initializer(0, [1], mindspore.int32),
                                         name='global_step')
            if isinstance(learning_rate, Iterable):
                learning_rate = Tensor(
                    np.array(list(learning_rate)).astype(np.float32))
            elif isinstance(learning_rate, Tensor):
                if learning_rate.dim() > 1:
                    raise ValueError(
                        "Learning rate should be a 0 or 1 dim `Tensor`,"
                        f"but got {learning_rate.dim()}.")
                if learning_rate.dim() == 1 and learning_rate.size() < 2:
                    logger.warning(
                        "If want to use the dynamic learning rate, please make sure that the number "
                        "of elements in the list, tuple or tensor passed is greater than 1."
                    )
            else:
                raise TypeError(
                    "Learning rate should be float, Tensor or Iterable.")

        if loss_scale <= 0.0:
            raise ValueError(
                "Loss scale should be greater than 0, but got {}".format(
                    loss_scale))
        if weight_decay < 0.0:
            raise ValueError(
                "Weight decay should be equal or greater than 0, but got {}".
                format(weight_decay))

        self.learning_rate = Parameter(learning_rate, name="learning_rate")
        self.parameters = ParameterTuple(parameters)
        self.reciprocal_scale = 1.0 / loss_scale
        self.weight_decay = weight_decay * loss_scale
        self.decay_flags = tuple(decay_filter(x) for x in self.parameters)

        if not self.parameters:
            raise ValueError("optimizer got an empty parameter list.")
Пример #8
0
    def __init__(self,
                 params,
                 learning_rate=0.1,
                 decay=0.9,
                 momentum=0.0,
                 epsilon=1e-10,
                 use_locking=False,
                 centered=False,
                 loss_scale=1.0,
                 weight_decay=0.0,
                 decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in
                 x.name):
        super(RMSProp, self).__init__(learning_rate, params)

        if isinstance(momentum, float) and momentum < 0.0:
            raise ValueError(
                "momentum should be at least 0.0, but got momentum {}".format(
                    momentum))

        if decay < 0.0:
            raise ValueError(
                "decay should be at least 0.0, but got dampening {}".format(
                    decay))
        self.decay = decay
        self.epsilon = epsilon

        validator.check_type("use_locking", use_locking, [bool])
        validator.check_type("centered", centered, [bool])
        self.centered = centered
        if centered:
            self.opt = P.ApplyCenteredRMSProp(use_locking)
            self.mg = self.parameters.clone(prefix="mean_grad", init='zeros')
        else:
            self.opt = P.ApplyRMSProp(use_locking)

        self.dynamic_lr = False
        if not isinstance(learning_rate, float):
            self.dynamic_lr = True
            self.gather = P.GatherV2()
            self.assignadd = P.AssignAdd()
            self.global_step = Parameter(initializer(0, [1], mstype.int32),
                                         name="global_step")
            self.axis = 0
            self.one = Tensor(1, mstype.int32)

        self.momentum = momentum

        self.ms = self.parameters.clone(prefix="mean_square", init='zeros')
        self.moment = self.parameters.clone(prefix="moment", init='zeros')
        self.hyper_map = C.HyperMap()

        self.decay = decay
        self.decay_tf = tuple(decay_filter(x) for x in self.parameters)
        self.reciprocal_scale = 1.0 / loss_scale
        self.weight_decay = weight_decay * loss_scale
Пример #9
0
    def __init__(self,
                 params,
                 learning_rate=0.1,
                 momentum=0.0,
                 dampening=0.0,
                 weight_decay=0.0,
                 nesterov=False,
                 loss_scale=1.0):

        super(SGD, self).__init__(learning_rate, params)

        if isinstance(momentum, float) and momentum < 0.0:
            raise ValueError(
                "momentum should be at least 0.0, but got momentum {}".format(
                    momentum))

        if dampening < 0.0:
            raise ValueError(
                "dampening should be at least 0.0, but got dampening {}".
                format(dampening))
        self.dampening = dampening

        if weight_decay < 0.0:
            raise ValueError(
                "weight_decay should be at least 0.0, but got weight_decay {}".
                format(weight_decay))
        self.weight_decay = weight_decay

        validator.check_type("nesterov", nesterov, [bool])
        self.nesterov = nesterov

        self.opt = P.SGD(dampening, weight_decay, nesterov)

        self.dynamic_lr = False
        self.gather = None
        self.global_step = None
        self.axis = None
        if not isinstance(learning_rate, float):
            self.dynamic_lr = True
            self.gather = P.GatherV2()
            self.assignadd = P.AssignAdd()
            self.global_step = Parameter(initializer(0, [1], mstype.int32),
                                         name="global_step")
            self.axis = 0
        self.momentum = Parameter(momentum, name="momentum")
        self.params = self.parameters
        self.accum = self.params.clone(prefix="accum", init='zeros')
        self.stat = self.params.clone(prefix="stat", init='ones')
        self.hyper_map = C.HyperMap()

        self.weight_decay = weight_decay * loss_scale
        self.reciprocal_scale = 1.0 / loss_scale
Пример #10
0
    def __init__(self,
                 params,
                 learning_rate=1e-3,
                 beta1=0.9,
                 beta2=0.999,
                 eps=1e-8,
                 use_locking=False,
                 use_nesterov=False,
                 weight_decay=0.0,
                 loss_scale=1.0,
                 decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in
                 x.name):
        super(Adam, self).__init__(learning_rate, params)
        _check_param_value(beta1, beta2, eps, weight_decay)
        validator.check_type("use_locking", use_locking, [bool])
        validator.check_type("use_nesterov", use_nesterov, [bool])
        validator.check_type("loss_scale", loss_scale, [float])
        validator.check_number_range("loss_scale", loss_scale, 1.0,
                                     float("inf"), Rel.INC_LEFT)

        self.dynamic_lr = False
        if isinstance(learning_rate, Iterable) or \
                (isinstance(learning_rate, Tensor) and learning_rate.dim() == 1):
            self.dynamic_lr = True
            self.gather = P.GatherV2()
            self.assignadd = P.AssignAdd()
            self.global_step = Parameter(initializer(0, [1], mstype.int32),
                                         name="global_step")
            self.axis = 0

        self.beta1 = Tensor(beta1, mstype.float32)
        self.beta2 = Tensor(beta2, mstype.float32)
        self.beta1_power = Parameter(initializer(1, [1], mstype.float32),
                                     name="beta1_power")
        self.beta2_power = Parameter(initializer(1, [1], mstype.float32),
                                     name="beta2_power")
        self.eps = eps

        self.moment1 = self.parameters.clone(prefix="moment1", init='zeros')
        self.moment2 = self.parameters.clone(prefix="moment2", init='zeros')

        self.decay_tf = tuple(decay_filter(x) for x in self.parameters)
        self.hyper_map = C.HyperMap()
        self.opt = P.Adam(use_locking, use_nesterov)
        self.weight_decay = weight_decay * loss_scale
        self.reciprocal_scale = 1.0 / loss_scale

        self.pow = P.Pow()
        self.sqrt = P.Sqrt()
        self.one = Tensor(np.array([1.0]).astype(np.float32))
        self.realdiv = P.RealDiv()
Пример #11
0
    def _init_group_params(self, parameters, learning_rate, weight_decay):
        """Init learning rate or weight decay in group params."""
        origin_dynamic_lr = self.dynamic_lr
        self._parse_group_params(parameters, learning_rate)
        if self.dynamic_lr and not origin_dynamic_lr:
            self.gather = P.GatherV2()
            self.assignadd = P.AssignAdd()
            self.global_step = Parameter(initializer(0, [1], mindspore.int32), name='global_step')

        params_store = []
        for group_param in parameters:
            if 'order_params' in group_param.keys():
                ordered_parameters = group_param['order_params']
                continue

            self.group_params += group_param['params']
            if 'lr' in group_param.keys():
                params_dynamic_lr = isinstance(group_param['lr'], (Iterable, Tensor))
                if self.dynamic_lr and not params_dynamic_lr:
                    lr = Tensor(np.array([group_param['lr']] * self.dynamic_lr_length).astype(np.float32))
                else:
                    lr = self._get_single_lr(group_param['lr'])
            else:
                if self.dynamic_lr and not origin_dynamic_lr:
                    lr = Tensor(np.array([self.scalar_lr] * self.dynamic_lr_length).astype(np.float32))
                else:
                    lr = learning_rate

            if 'weight_decay' in group_param.keys():
                validator.check_float_legal_value('weight_decay', group_param['weight_decay'], None)
                validator.check_number_range('weight_decay', group_param['weight_decay'], 0.0, float("inf"),
                                             Rel.INC_LEFT, self.cls_name)
                weight_decay_ = group_param['weight_decay'] * self.loss_scale
            else:
                weight_decay_ = weight_decay * self.loss_scale

            for key in group_param.keys():
                if key not in ('params', 'lr', 'weight_decay'):
                    logger.warning(f"The optimizer cannot parse '{key}' when setting parameter groups.")

            for param in group_param['params']:
                validator.check_value_type("parameter", param, [Parameter], self.cls_name)
                if param.name in params_store:
                    raise RuntimeError(f"The {param.name} parameter has appeared in parameter groups.")

                params_store.append(param.name)
                self.group_lr.append(Parameter(lr, name="lr_" + param.name))
                self.group_weight_decay.append(weight_decay_)

        if self.is_group_params_ordered:
            self._order_and_adjust_group_params(ordered_parameters, learning_rate, weight_decay)
Пример #12
0
 def __init__(self):
     super(AssignAdd, self).__init__()
     self.add = P.AssignAdd()
Пример #13
0
 def __init__(self):
     super(Assign_RAW, self).__init__()
     self.assign_add = P.AssignAdd()
     self.greater = P.Greater()
     self.add = P.Add()
     self.para = Parameter(Tensor(1, dtype=ms.int32), name='para')
Пример #14
0
def _cumulative_gard(grad_sum, grad):
    """Apply gard sum to cumulative gradient."""
    add = P.AssignAdd()
    return add(grad_sum, grad)
Пример #15
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 weight_init='normal',
                 bias_init='zeros',
                 damping=0.03,
                 loss_scale=1,
                 frequency=278,
                 has_bias=True,
                 activation=None):
        super(Dense_Thor, self).__init__()
        self.in_channels = Validator.check_positive_int(in_channels)
        self.out_channels = Validator.check_positive_int(out_channels)
        self.has_bias = Validator.check_bool(has_bias)
        self.thor = True
        if isinstance(weight_init, Tensor):
            if weight_init.ndim != 2 or weight_init.shape[0] != out_channels or \
                    weight_init.shape[1] != in_channels:
                raise ValueError("weight_init shape error")

        self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight")

        if self.has_bias:
            if isinstance(bias_init, Tensor):
                if bias_init.ndim != 1 or bias_init.shape[0] != out_channels:
                    raise ValueError("bias_init shape error")

            self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias")

        self.matmul = P.MatMul(transpose_b=True)
        self.bias_add = P.BiasAdd()

        self.activation = get_activation(activation)
        self.activation_flag = self.activation is not None

        self.matrix_A_inv = Parameter(Tensor(np.zeros([128, 128, 16, 16]).astype(np.float16)), name='matrix_A_inv',
                                      requires_grad=False)
        self.matrix_G_inv = Parameter(Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16)), name="matrix_G_inv",
                                      requires_grad=False)
        self.fake_G = Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16))

        self.matmul = P.MatMul(transpose_b=True)
        self.cube_matmul = P.CusMatMulCube(transpose_a=True)
        self.matrix_combine = P.CusMatrixCombine()
        self.cholesky = P.CusCholeskyTrsm()
        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.transpose = P.Transpose()
        self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
        self.mul = P.Mul()
        self.cast = P.Cast()
        self.damping = Tensor(damping)
        self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
        self.vector_matmul = P.CusBatchMatMul()
        self.pad = P.Pad(((0, 24), (0, 24)))
        self.pad1 = P.Pad(((0, 8), (0, 8)))
        self.slice = P.Slice()
        self.gather = P.GatherV2()
        self.assignadd = P.AssignAdd()
        self.freq = Tensor(frequency, mstype.int32)
        self.axis = 0
        self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), name="A_inv_max", requires_grad=False)
        self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False)
        self.fused_abs_max1 = P.CusFusedAbsMax1([1000, 1000])
        self.fused_abs_max2 = P.CusFusedAbsMax1()
        self.log = P.Log()
        self.exp = P.Exp()
        self.dampingA = Tensor(np.identity(2048), mstype.float32)
        self.dampingG = Tensor(np.identity(1024), mstype.float32)
        self.add = P.TensorAdd()
        self.sqrt = P.Sqrt()
        self.getG = P.InsertGradientOf(self.save_gradient)
Пример #16
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 weight_init='normal',
                 bias_init='zeros',
                 damping=0.03,
                 loss_scale=1,
                 frequency=100,
                 has_bias=False,
                 activation=None,
                 batch_size=12):
        super(Dense_Thor, self).__init__()
        self.in_channels = Validator.check_positive_int(in_channels)
        self.out_channels = Validator.check_positive_int(out_channels)
        self.has_bias = Validator.check_bool(has_bias)
        self.thor = True
        if isinstance(weight_init, Tensor):
            if weight_init.dim() != 2 or weight_init.shape()[0] != out_channels or \
                    weight_init.shape()[1] != in_channels:
                raise ValueError("weight_init shape error")

        self.weight = Parameter(initializer(weight_init,
                                            [out_channels, in_channels]),
                                name="weight")

        if self.has_bias:
            if isinstance(bias_init, Tensor):
                if bias_init.dim() != 1 or bias_init.shape(
                )[0] != out_channels:
                    raise ValueError("bias_init shape error")

            self.bias = Parameter(initializer(bias_init, [out_channels]),
                                  name="bias")

        self.matmul = P.MatMul(transpose_b=True)
        self.bias_add = P.BiasAdd()

        self.activation = get_activation(activation)
        self.activation_flag = self.activation is not None
        self.matrix_A_inv = Parameter(Tensor(
            np.zeros([in_channels, in_channels]).astype(np.float16)),
                                      name='matrix_A_inv',
                                      requires_grad=False)
        self.matrix_G_inv = Parameter(Tensor(
            np.zeros([out_channels, out_channels]).astype(np.float16)),
                                      name="matrix_G_inv",
                                      requires_grad=False)
        self.fake_G = Tensor(
            np.zeros([out_channels, out_channels]).astype(np.float16))

        self.matmul = P.MatMul(transpose_b=True)
        self.cube_matmul = P.CusMatMulCube(transpose_a=True)
        self.matrix_combine = P.CusMatrixCombine()
        self.cholesky = P.CusCholeskyTrsm()
        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.transpose = P.Transpose()
        self.cov_step = Parameter(initializer(0, [1], mstype.int32),
                                  name="cov_step",
                                  requires_grad=False)
        self.mul = P.Mul()
        self.cast = P.Cast()
        self.damping = damping
        self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
        self.vector_matmul = P.CusBatchMatMul()
        self.gather = P.GatherV2()
        self.assignadd = P.AssignAdd()
        self.freq = Tensor(frequency, mstype.int32)
        self.axis = 0
        self.abs = P.Abs()
        self.reduce_max = P.ReduceMax(keep_dims=False)
        self.log = P.Log()
        self.exp = P.Exp()
        self.dampingA = Tensor(np.identity(in_channels), mstype.float32)
        self.dampingG = Tensor(np.identity(out_channels), mstype.float32)
        self.sqrt = P.Sqrt()
        self.getG = P.InsertGradientOf(self.save_gradient)
        self.batch_size = batch_size
Пример #17
0
 def __init__(self, value):
     super(AssignAdd, self).__init__()
     self.var = Parameter(value, name="var")
     self.add = P.AssignAdd()
Пример #18
0
 def __init__(self):
     super(Net, self).__init__()
     self.AssignAdd = P.AssignAdd()
     self.inputdata = Parameter(initializer(1, [1], ms.int64),
                                name="global_step")
     print("inputdata: ", self.inputdata)
Пример #19
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 pad_mode='same',
                 padding=0,
                 dilation=1,
                 group=1,
                 eps=1e-5,
                 momentum=0.997,
                 weight_init=None,
                 beta_init=None,
                 gamma_init=None,
                 mean_init=None,
                 var_init=None,
                 quant_delay=0,
                 freeze_bn=100000,
                 fake=True,
                 num_bits=8,
                 per_channel=False,
                 symmetric=False,
                 narrow_range=False):
        super(Conv2dBatchNormQuant, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.pad_mode = pad_mode
        self.padding = padding
        self.dilation = twice(dilation)
        self.stride = twice(stride)
        self.group = group
        self.fake = fake
        self.freeze_bn = freeze_bn
        self.momentum = momentum
        self.quant_delay = quant_delay
        if isinstance(kernel_size, int):
            self.kernel_size = (kernel_size, kernel_size)
        else:
            self.kernel_size = kernel_size

        if weight_init is None:
            weight_init = initializer(
                'normal',
                [out_channels, in_channels // group, *self.kernel_size])
        self.weight = Parameter(weight_init, name='weight')
        if gamma_init is None:
            gamma_init = initializer('ones', [out_channels])
        self.gamma = Parameter(gamma_init, name='gamma')
        if beta_init is None:
            beta_init = initializer('zeros', [out_channels])
        self.beta = Parameter(beta_init, name='beta')
        if mean_init is None:
            mean_init = initializer('zeros', [out_channels])
        self.moving_mean = Parameter(mean_init,
                                     name='moving_mean',
                                     requires_grad=False)
        if var_init is None:
            var_init = initializer('ones', [out_channels])
        self.moving_variance = Parameter(var_init,
                                         name='moving_variance',
                                         requires_grad=False)

        self.step = Parameter(initializer('normal', [1], dtype=mstype.int32),
                              name='step',
                              requires_grad=False)

        self.conv = P.Conv2D(out_channel=self.out_channels,
                             kernel_size=self.kernel_size,
                             mode=1,
                             pad_mode=self.pad_mode,
                             pad=self.padding,
                             stride=self.stride,
                             dilation=self.dilation,
                             group=self.group)
        self.fake_quant_weight = FakeQuantWithMinMax(min_init=-6,
                                                     max_init=6,
                                                     ema=False,
                                                     num_bits=num_bits,
                                                     quant_delay=quant_delay,
                                                     per_channel=per_channel,
                                                     out_channels=out_channels,
                                                     symmetric=symmetric,
                                                     narrow_range=narrow_range)
        self.batchnorm_fold_train = P.BatchNormFold(epsilon=eps,
                                                    momentum=momentum,
                                                    is_training=True,
                                                    freeze_bn=freeze_bn)
        self.batchnorm_fold_infer = P.BatchNormFold(epsilon=eps,
                                                    momentum=momentum,
                                                    is_training=False,
                                                    freeze_bn=freeze_bn)
        self.correct_mul = P.CorrectionMul()
        self.relu = P.ReLU()
        self.batchnorm_fold2 = P.BatchNormFold2(freeze_bn=freeze_bn)
        self.batchnorm_fold2_infer = P.BatchNormFold2(freeze_bn=0)
        self.one = Tensor(1, mstype.int32)
        self.assignadd = P.AssignAdd()
Пример #20
0
 def __init__(self, para):
     super(AssignAddNet, self).__init__()
     self.para = Parameter(para, name="para")
     self.assign_add = P.AssignAdd()
Пример #21
0
 def __init__(self,):
     super(AssignAddNet, self).__init__()
     self.op = P.AssignAdd()
     self.inputdata = Parameter(Tensor(np.zeros([1]).astype(np.bool_), mstype.bool_), name="assign_add1")
Пример #22
0
    def __init__(self,
                 learning_rate,
                 parameters,
                 weight_decay=0.0,
                 loss_scale=1.0):
        super(Optimizer, self).__init__(auto_prefix=False)
        if parameters and not isinstance(parameters, list):
            parameters = list(parameters)

        if not parameters:
            raise ValueError("Optimizer got an empty parameter list.")

        if not isinstance(parameters[0], (dict, Parameter)):
            raise TypeError(
                "Only a list of Parameter or dict can be supported.")

        if isinstance(loss_scale, int):
            loss_scale = float(loss_scale)
        validator.check_value_type("loss_scale", loss_scale, [float], None)
        validator.check_number_range("loss_scale", loss_scale, 0.0,
                                     float("inf"), Rel.INC_NEITHER, None)

        if isinstance(weight_decay, int):
            weight_decay = float(weight_decay)
        validator.check_value_type("weight_decay", weight_decay, [float], None)
        validator.check_number_range("weight_decay", weight_decay, 0.0,
                                     float("inf"), Rel.INC_LEFT, None)

        self.is_group = False
        self.is_group_lr = False
        self.loss_scale = loss_scale
        if isinstance(learning_rate, float):
            self.dynamic_lr = False
            self.gather = None
            self.assignadd = None
            self.global_step = None
            self.scalar_lr = learning_rate
        else:
            self.dynamic_lr = True
            self.gather = P.GatherV2()
            self.assignadd = P.AssignAdd()
            self.global_step = Parameter(initializer(0, [1], mindspore.int32),
                                         name='global_step')
            self.scalar_lr = None

        learning_rate = self._get_single_lr(learning_rate)
        if isinstance(parameters[0], dict):
            self.is_group = True
            self.params = []
            self.group_lr = []
            self.group_weight_decay = []
            self._init_group_params(parameters, learning_rate, weight_decay)

        if self.is_group_lr:
            self.learning_rate = ParameterTuple(self.group_lr)
        else:
            self.learning_rate = Parameter(learning_rate, name="learning_rate")

        if self.is_group:
            self.parameters = ParameterTuple(self.params)
            self.weight_decay = tuple(self.group_weight_decay)
            decay_filter = lambda x: x > 0
            self.decay_flags = tuple(
                decay_filter(x) for x in self.weight_decay)
        else:
            self.parameters = ParameterTuple(parameters)
            self.weight_decay = weight_decay * loss_scale
            decay_filter = lambda x: 'beta' not in x.name and 'gamma' not in x.name
            self.decay_flags = tuple(decay_filter(x) for x in self.parameters)
        self.reciprocal_scale = 1.0 / loss_scale
        self.exec_weight_decay = any(self.decay_flags)
        self.param_length = len(self.parameters)
Пример #23
0
    ('StridedSlice_2_Error', {
        'block': (lambda x: P.StridedSlice(end_mask="1"), {
            'exception': TypeError
        }),
        'desc_inputs': [0]
    }),
    ('StridedSlice_3_Error', {
        'block': (lambda x: P.StridedSlice(ellipsis_mask=1.1), {
            'exception': TypeError
        }),
        'desc_inputs': [0]
    }),
    ('StridedSlice_4_Error', {
        'block': (lambda x: P.StridedSlice(new_axis_mask="1.1"), {
            'exception': TypeError
        }),
        'desc_inputs': [0]
    }),
    ('AssignAdd_Error', {
        'block': (P.AssignAdd(), {
            'exception': IndexError
        }),
        'desc_inputs': [[1]]
    }),
]


@mindspore_test(pipeline_for_verify_exception_for_case_by_case_config)
def test_check_exception():
    return raise_set
Пример #24
0
    def __init__(self,
                 learning_rate,
                 parameters,
                 weight_decay=0.0,
                 loss_scale=1.0):
        super(Optimizer, self).__init__(auto_prefix=False)
        if parameters is not None and not isinstance(parameters, list):
            parameters = list(parameters)

        if not parameters:
            raise ValueError("Optimizer got an empty parameter list.")

        if not isinstance(parameters[0], (dict, Parameter)):
            raise TypeError(
                "Only a list of Parameter or dict can be supported.")

        if isinstance(loss_scale, int):
            loss_scale = float(loss_scale)
        validator.check_value_type("loss_scale", loss_scale, [float],
                                   self.cls_name)
        validator.check_positive_float(loss_scale, "loss_scale", self.cls_name)
        self.loss_scale = loss_scale

        weight_decay = self._preprocess_weight_decay(weight_decay)
        self.grad_centralization = False

        self._unique = True
        self._target = context.get_context("device_target")
        self.dynamic_lr = False
        self.assignadd = None
        self.global_step = None
        self.is_group = False
        self.is_group_lr = False
        self.is_group_params_ordered = False
        learning_rate = self._preprocess_single_lr(learning_rate)
        if isinstance(parameters[0], dict):
            self.is_group = True
            self.group_params = []
            self.group_lr = []
            self.group_weight_decay = []
            self.group_grad_centralization = []
            self._init_group_params(parameters, learning_rate, weight_decay,
                                    self.grad_centralization)

        # The final value of dynamic_lr can be determined after the process of parse_single_lr and init_group_params
        if self.dynamic_lr:
            self.assignadd = P.AssignAdd()
            self.global_step = Parameter(initializer(0, [1], mindspore.int32),
                                         name='global_step')

        if self.is_group_lr:
            self.learning_rate = CellList(
                self.group_lr) if self.dynamic_lr else ParameterTuple(
                    self.group_lr)
        else:
            self.learning_rate = self._build_single_lr(learning_rate,
                                                       'learning_rate')

        if self.is_group:
            self.parameters = ParameterTuple(self.group_params)
            self.weight_decay = tuple(self.group_weight_decay)
            self.weight_decay_tensor_tuple = tuple(
                Tensor(x, mstype.float32) for x in self.group_weight_decay)
            decay_filter = lambda x: x > 0
            self.decay_flags = tuple(
                decay_filter(x) for x in self.weight_decay)
            self.exec_weight_decay = any(self.decay_flags)
            self.grad_centralization_flags = tuple(
                self.group_grad_centralization)
        else:
            self.parameters = ParameterTuple(parameters)
            self.weight_decay = weight_decay * loss_scale
            self.weight_decay_tensor = Tensor(self.weight_decay,
                                              mstype.float32)
            decay_filter = lambda x: 'beta' not in x.name and 'gamma' not in x.name
            self.decay_flags = tuple(decay_filter(x) for x in self.parameters)
            self.exec_weight_decay = self.weight_decay > 0
        # when a parameter has been unique, there is no need do another unique in optimizer.
        for param in self.parameters:
            if param.unique:
                self._unique = False
                break
        ps_filter = lambda x: x.is_param_ps
        self.ps_parameters = tuple(ps_filter(x) for x in self.parameters)
        cache_filter = lambda x: x.cache_enable
        self.cache_enable = tuple(cache_filter(x) for x in self.parameters)
        self.reciprocal_scale = Tensor(1.0 / loss_scale, mstype.float32)
        self.need_scale = loss_scale != 1.0
        self.global_step_increase_tensor = Tensor(1, mstype.int32)
        self.param_length = len(self.parameters)
        self.map_ = C.Map()
        self._use_parallel_optimizer()
Пример #25
0
 def __init__(self):
     super().__init__()
     self.op = P.AssignAdd()
     self.inputdata = Parameter(initializer(1, [1], ms.float32),
                                name="global_step")
Пример #26
0
@non_graph_engine
@mindspore_test(pipeline_for_compile_forward_ge_graph_for_case_by_case_config)
def test_exec():
    context.set_context(mode=context.GRAPH_MODE)
    return test_exec_case


raise_set = [
    ('StridedSlice_1_Error', {
        'block': (lambda x: P.StridedSlice(begin_mask="1"), {'exception': TypeError}),
        'desc_inputs': [0]}),
    ('StridedSlice_2_Error', {
        'block': (lambda x: P.StridedSlice(end_mask="1"), {'exception': TypeError}),
        'desc_inputs': [0]}),
    ('StridedSlice_3_Error', {
        'block': (lambda x: P.StridedSlice(ellipsis_mask=1.1), {'exception': TypeError}),
        'desc_inputs': [0]}),
    ('StridedSlice_4_Error', {
        'block': (lambda x: P.StridedSlice(new_axis_mask="1.1"), {'exception': TypeError}),
        'desc_inputs': [0]}),
    ('AssignAdd_Error', {
        'block': (P.AssignAdd(), {'exception': ValueError}),
        'desc_inputs': [[1]]}),
]


@mindspore_test(pipeline_for_verify_exception_for_case_by_case_config)
def test_check_exception():
    return raise_set
Пример #27
0
 def __init__(self):
     super(AssignAddNet, self).__init__()
     self.AssignAdd = P.AssignAdd()
     self.inputdata = Parameter(initializer(1, [1], ms.float16),
                                name="KIND_AUTOCAST_SCALAR_TO_TENSOR")
     self.one = 1
Пример #28
0
    def __init__(self, learning_rate, parameters, weight_decay=0.0, loss_scale=1.0):
        super(Optimizer, self).__init__(auto_prefix=False)
        if parameters is not None and not isinstance(parameters, list):
            parameters = list(parameters)

        if not parameters:
            raise ValueError("Optimizer got an empty parameter list.")

        if not isinstance(parameters[0], (dict, Parameter)):
            raise TypeError("Only a list of Parameter or dict can be supported.")

        if isinstance(loss_scale, int):
            loss_scale = float(loss_scale)
        validator.check_value_type("loss_scale", loss_scale, [float], self.cls_name)
        validator.check_number_range("loss_scale", loss_scale, 0.0, float("inf"), Rel.INC_NEITHER, self.cls_name)
        self.loss_scale = loss_scale

        weight_decay = self._preprocess_weight_decay(weight_decay)

        self.dynamic_lr = False
        self.assignadd = None
        self.global_step = None
        self.is_group = False
        self.is_group_lr = False
        self.is_group_params_ordered = False
        learning_rate = self._preprocess_single_lr(learning_rate)
        if isinstance(parameters[0], dict):
            self.is_group = True
            self.group_params = []
            self.group_lr = []
            self.group_weight_decay = []
            self._init_group_params(parameters, learning_rate, weight_decay)

        # The final value of dynamic_lr can be determined after the process of parse_single_lr and init_group_params
        if self.dynamic_lr:
            self.assignadd = P.AssignAdd()
            self.global_step = Parameter(initializer(0, [1], mindspore.int32), name='global_step')

        if self.is_group_lr:
            if self.dynamic_lr:
                self.learning_rate = CellList(self.group_lr)
            else:
                self.learning_rate = ParameterTuple(self.group_lr)
        else:
            self.learning_rate = self._build_single_lr(learning_rate, 'learning_rate')
        if self.is_group:
            self.parameters = ParameterTuple(self.group_params)
            self.weight_decay = tuple(self.group_weight_decay)
            decay_filter = lambda x: x > 0
            self.decay_flags = tuple(decay_filter(x) for x in self.weight_decay)
            self.exec_weight_decay = any(self.decay_flags)
        else:
            self.parameters = ParameterTuple(parameters)
            self.weight_decay = weight_decay * loss_scale
            decay_filter = lambda x: 'beta' not in x.name and 'gamma' not in x.name
            self.decay_flags = tuple(decay_filter(x) for x in self.parameters)
            self.exec_weight_decay = self.weight_decay > 0
        ps_filter = lambda x: x.is_param_ps
        self.ps_parameters = tuple(ps_filter(x) for x in self.parameters)
        self.reciprocal_scale = 1.0 / loss_scale
        self.param_length = len(self.parameters)
        self.map_ = C.Map()

        use_parallel = context.get_auto_parallel_context("enable_parallel_optimizer")
        self.use_parallel = use_parallel
        if use_parallel:
            if self.cls_name not in ["Lamb", "AdamWeightDecay"]:
                raise RuntimeError("Optimizer segmentation does not support optimizer {}".format(self.cls_name))
            if _get_parallel_mode() != ParallelMode.DATA_PARALLEL:
                raise RuntimeError("Optimizer segmentation does not support parallel mode {}".format
                                   (_get_parallel_mode()))
            self.dev_num = _get_device_num()
            if self.dev_num > self.param_length:
                raise RuntimeError("Optimizer segmentation can not be applied when the number of parameters {} is"
                                   " less than the number of devices {}".format(self.param_length, self.dev_num))
            self.param_rank = self._get_parameter_group_id()
            self.optim_filter = tuple(map(lambda x: x == _get_global_rank(), self.param_rank))
            self.param_names = []
            for param in self.parameters:
                self.param_names.append(param.name)

        else:
            self.optim_filter = (True,) * self.param_length
Пример #29
0
    def __init__(self,
                 learning_rate,
                 parameters,
                 weight_decay=0.0,
                 loss_scale=1.0):
        super(Optimizer, self).__init__(auto_prefix=False)
        if parameters is not None and not isinstance(parameters, list):
            parameters = list(parameters)

        if not parameters:
            raise ValueError("Optimizer got an empty parameter list.")

        if not isinstance(parameters[0], (dict, Parameter)):
            raise TypeError(
                "Only a list of Parameter or dict can be supported.")

        if isinstance(loss_scale, int):
            loss_scale = float(loss_scale)
        validator.check_value_type("loss_scale", loss_scale, [float],
                                   self.cls_name)
        validator.check_positive_float(loss_scale, "loss_scale", self.cls_name)
        self.loss_scale = loss_scale

        weight_decay = self._preprocess_weight_decay(weight_decay)

        self._unique = True
        self._target = context.get_context("device_target")
        self.dynamic_lr = False
        self.assignadd = None
        self.global_step = None
        self.is_group = False
        self.is_group_lr = False
        self.is_group_params_ordered = False
        learning_rate = self._preprocess_single_lr(learning_rate)
        if isinstance(parameters[0], dict):
            self.is_group = True
            self.group_params = []
            self.group_lr = []
            self.group_weight_decay = []
            self._init_group_params(parameters, learning_rate, weight_decay)

        # The final value of dynamic_lr can be determined after the process of parse_single_lr and init_group_params
        if self.dynamic_lr:
            self.assignadd = P.AssignAdd()
            self.global_step = Parameter(initializer(0, [1], mindspore.int32),
                                         name='global_step')

        if self.is_group_lr:
            if self.dynamic_lr:
                self.learning_rate = CellList(self.group_lr)
            else:
                self.learning_rate = ParameterTuple(self.group_lr)
        else:
            self.learning_rate = self._build_single_lr(learning_rate,
                                                       'learning_rate')
        if self.is_group:
            self.parameters = ParameterTuple(self.group_params)
            self.weight_decay = tuple(self.group_weight_decay)
            self.weight_decay_tensor_tuple = tuple(
                Tensor(x, mstype.float32) for x in self.group_weight_decay)
            decay_filter = lambda x: x > 0
            self.decay_flags = tuple(
                decay_filter(x) for x in self.weight_decay)
            self.exec_weight_decay = any(self.decay_flags)
        else:
            self.parameters = ParameterTuple(parameters)
            self.weight_decay = weight_decay * loss_scale
            self.weight_decay_tensor = Tensor(self.weight_decay,
                                              mstype.float32)
            decay_filter = lambda x: 'beta' not in x.name and 'gamma' not in x.name
            self.decay_flags = tuple(decay_filter(x) for x in self.parameters)
            self.exec_weight_decay = self.weight_decay > 0
        # when a parameter has been unique, there is no need do another unique in optimizer.
        for param in self.parameters:
            if param.unique:
                self._unique = False
                break
        ps_filter = lambda x: x.is_param_ps
        self.ps_parameters = tuple(ps_filter(x) for x in self.parameters)
        ps_cache_filter = lambda x: x.cache_enable
        self.cache_enable = tuple(ps_cache_filter(x) for x in self.parameters)
        self.reciprocal_scale = Tensor(1.0 / loss_scale, mstype.float32)
        self.need_scale = loss_scale != 1.0
        self.global_step_increase_tensor = Tensor(1, mstype.int32)
        self.param_length = len(self.parameters)
        self.map_ = C.Map()
        if context.get_auto_parallel_context("enable_parallel_optimizer"):
            if _get_parallel_mode(
            ) == ParallelMode.DATA_PARALLEL and context.get_context(
                    "device_target") == "Ascend":
                self.use_parallel = True
            elif _get_parallel_mode() == ParallelMode.DATA_PARALLEL \
                    and context.get_context("device_target") != "Ascend":
                raise RuntimeError(
                    "Parallel optimizer only supports Ascend in data parallel mode."
                )
            elif _get_parallel_mode() in (ParallelMode.STAND_ALONE,
                                          ParallelMode.HYBRID_PARALLEL):
                raise RuntimeError(
                    "Parallel optimizer is not supported in {}.".format(
                        _get_parallel_mode()))
            else:
                self.use_parallel = False
        else:
            self.use_parallel = False
        if self.use_parallel:
            if self.cls_name not in ["Lamb", "AdamWeightDecay"]:
                raise RuntimeError(
                    "Parallel optimizer does not support optimizer {}".format(
                        self.cls_name))
            self.dev_num = _get_device_num()
            if self.dev_num > self.param_length:
                raise RuntimeError(
                    "Parallel optimizer can not be applied when the number of parameters {} is"
                    " less than the number of devices {}".format(
                        self.param_length, self.dev_num))
            self.param_rank = self._get_parameter_group_id()
            self.optim_filter = tuple(
                map(lambda x: x == _get_global_rank(), self.param_rank))
            self.param_names = []
            for param in self.parameters:
                self.param_names.append(param.name)

        else:
            self.optim_filter = (True, ) * self.param_length
Пример #30
0
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

from mindspore.ops import Primitive
from mindspore.ops import operations as P
from mindspore.ops import _constants as Constants

depend = P.Depend()
all_reduce = P.AllReduce()
broadcast = P.Broadcast(1)
tensor_move = Primitive('TensorMove')
make_tuple = Primitive('MakeTuple')
tuple_getitem = Primitive(Constants.kTupleGetItem)
assign_add = P.AssignAdd()
apply_momentun = P.ApplyMomentum()
relu = P.ReLU()


class FnDict:
    def __init__(self):
        self.fnDict = {}

    def __call__(self, fn):
        self.fnDict[fn.__name__] = fn

    def __getitem__(self, name):
        return self.fnDict[name]