Exemplo n.º 1
0
    def __init__(self, weight, vocab_size, cell, batch_size):
        super(textrcnn, self).__init__()
        self.num_hiddens = 512
        self.embed_size = 300
        self.num_classes = 2
        self.batch_size = batch_size
        k = (1 / self.num_hiddens)**0.5

        self.embedding = nn.Embedding(vocab_size,
                                      self.embed_size,
                                      embedding_table=weight)
        self.embedding.embedding_table.requires_grad = False
        self.cell = cell

        self.cast = P.Cast()

        self.h1 = Tensor(
            np.zeros(shape=(self.batch_size,
                            self.num_hiddens)).astype(np.float16))
        self.c1 = Tensor(
            np.zeros(shape=(self.batch_size,
                            self.num_hiddens)).astype(np.float16))

        if cell == "lstm":
            self.lstm = P.DynamicRNN(forget_bias=0.0)
            self.w1_fw = Parameter(np.random.uniform(
                -k, k, (self.embed_size + self.num_hiddens,
                        4 * self.num_hiddens)).astype(np.float16),
                                   name="w1_fw")
            self.b1_fw = Parameter(np.random.uniform(
                -k, k, (4 * self.num_hiddens)).astype(np.float16),
                                   name="b1_fw")
            self.w1_bw = Parameter(np.random.uniform(
                -k, k, (self.embed_size + self.num_hiddens,
                        4 * self.num_hiddens)).astype(np.float16),
                                   name="w1_bw")
            self.b1_bw = Parameter(np.random.uniform(
                -k, k, (4 * self.num_hiddens)).astype(np.float16),
                                   name="b1_bw")
            self.h1 = Tensor(
                np.zeros(shape=(1, self.batch_size,
                                self.num_hiddens)).astype(np.float16))
            self.c1 = Tensor(
                np.zeros(shape=(1, self.batch_size,
                                self.num_hiddens)).astype(np.float16))

        if cell == "vanilla":
            self.rnnW_fw = nn.Dense(self.num_hiddens, self.num_hiddens)
            self.rnnU_fw = nn.Dense(self.embed_size, self.num_hiddens)
            self.rnnW_bw = nn.Dense(self.num_hiddens, self.num_hiddens)
            self.rnnU_bw = nn.Dense(self.embed_size, self.num_hiddens)

        if cell == "gru":
            self.rnnWr_fw = nn.Dense(self.num_hiddens + self.embed_size,
                                     self.num_hiddens)
            self.rnnWz_fw = nn.Dense(self.num_hiddens + self.embed_size,
                                     self.num_hiddens)
            self.rnnWh_fw = nn.Dense(self.num_hiddens + self.embed_size,
                                     self.num_hiddens)
            self.rnnWr_bw = nn.Dense(self.num_hiddens + self.embed_size,
                                     self.num_hiddens)
            self.rnnWz_bw = nn.Dense(self.num_hiddens + self.embed_size,
                                     self.num_hiddens)
            self.rnnWh_bw = nn.Dense(self.num_hiddens + self.embed_size,
                                     self.num_hiddens)
            self.ones = Tensor(
                np.ones(shape=(self.batch_size,
                               self.num_hiddens)).astype(np.float16))
            self.rnnWr_fw.to_float(mstype.float16)
            self.rnnWz_fw.to_float(mstype.float16)
            self.rnnWh_fw.to_float(mstype.float16)
            self.rnnWr_bw.to_float(mstype.float16)
            self.rnnWz_bw.to_float(mstype.float16)
            self.rnnWh_bw.to_float(mstype.float16)

        self.transpose = P.Transpose()
        self.reduce_max = P.ReduceMax()
        self.expand_dims = P.ExpandDims()
        self.concat = P.Concat()

        self.reshape = P.Reshape()
        self.left_pad_tensor = Tensor(
            np.zeros(
                (1, self.batch_size, self.num_hiddens)).astype(np.float16))
        self.right_pad_tensor = Tensor(
            np.zeros(
                (1, self.batch_size, self.num_hiddens)).astype(np.float16))
        self.output_dense = nn.Dense(self.num_hiddens * 1, 2)
        self.concat0 = P.Concat(0)
        self.concat2 = P.Concat(2)
        self.concat1 = P.Concat(1)
        self.text_rep_dense = nn.Dense(2 * self.num_hiddens + self.embed_size,
                                       self.num_hiddens)
        self.mydense = nn.Dense(self.num_hiddens, 2)
        self.drop_out = nn.Dropout(keep_prob=0.7)
        self.tanh = P.Tanh()
        self.sigmoid = P.Sigmoid()
        self.slice = P.Slice()
        self.text_rep_dense.to_float(mstype.float16)
        self.mydense.to_float(mstype.float16)
        self.output_dense.to_float(mstype.float16)
Exemplo n.º 2
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 weight_init='normal',
                 bias_init='zeros',
                 damping=0.03,
                 loss_scale=1,
                 frequency=278,
                 batch_size=32,
                 has_bias=True,
                 activation=None):
        super(Dense_Thor, self).__init__()
        self.in_channels = Validator.check_positive_int(in_channels)
        self.out_channels = Validator.check_positive_int(out_channels)
        self.has_bias = Validator.check_bool(has_bias)
        self.thor = True
        self.batch_size = batch_size
        if isinstance(weight_init, Tensor):
            if weight_init.dim() != 2 or weight_init.shape[0] != out_channels or \
                    weight_init.shape[1] != in_channels:
                raise ValueError("weight_init shape error")

        self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight")

        if self.has_bias:
            if isinstance(bias_init, Tensor):
                if bias_init.dim() != 1 or bias_init.shape[0] != out_channels:
                    raise ValueError("bias_init shape error")

            self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias")

        self.matmul = P.MatMul(transpose_b=True)
        self.bias_add = P.BiasAdd()

        self.activation = get_activation(activation)
        self.activation_flag = self.activation is not None

        self.matrix_A_inv = Parameter(Tensor(np.zeros([128, 128, 16, 16]).astype(np.float16)), name='matrix_A_inv',
                                      requires_grad=False)
        self.matrix_G_inv = Parameter(Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16)), name="matrix_G_inv",
                                      requires_grad=False)
        self.fake_G = Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16))

        self.matmul = P.MatMul(transpose_b=True)
        self.cube_matmul = P.CusMatMulCube(transpose_a=True)
        self.matrix_combine = P.CusMatrixCombine()
        self.cholesky = P.CusCholeskyTrsm()
        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.transpose = P.Transpose()
        self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
        self.mul = P.Mul()
        self.cast = P.Cast()
        self.damping = Tensor(damping)
        self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
        self.vector_matmul = P.CusBatchMatMul()
        self.pad = P.Pad(((0, 23), (0, 23)))
        self.pad1 = P.Pad(((0, 7), (0, 7)))
        self.slice = P.Slice()
        self.gather = P.GatherV2()
        self.assignadd = P.AssignAdd()
        self.freq = Tensor(frequency, mstype.int32)
        self.axis = 0
        self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), name="A_inv_max", requires_grad=False)
        self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False)
        self.fused_abs_max1 = P.CusFusedAbsMax1([1001, 1001])
        self.fused_abs_max2 = P.CusFusedAbsMax1()
        self.log = P.Log()
        self.exp = P.Exp()
        self.dampingA = Tensor(np.identity(2048), mstype.float32)
        self.dampingG = Tensor(np.identity(1024), mstype.float32)
        self.add = P.TensorAdd()
        self.sqrt = P.Sqrt()
        self.getG = P.InsertGradientOf(self.save_gradient)
Exemplo n.º 3
0
 def __init__(self):
     super(SwitchLayerCell, self).__init__()
     self.layers = (Layer1(), Layer2())
     self.z3 = Parameter(
         Tensor(np.full([128, 96], 0.6, dtype=np.float32)), name='z3')
Exemplo n.º 4
0
 def __init__(self):
     super(Net, self).__init__()
     self.z = Parameter(Tensor(np.ones([2]).astype(np.float32)),
                        name='z')
Exemplo n.º 5
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 weight_init='normal',
                 bias_init='zeros',
                 damping=0.03,
                 loss_scale=1,
                 frequency=278,
                 batch_size=32,
                 has_bias=True,
                 activation=None):
        super(Dense_Thor_GPU, self).__init__()
        self.in_channels = Validator.check_positive_int(in_channels)
        self.out_channels = Validator.check_positive_int(out_channels)
        self.has_bias = Validator.check_bool(has_bias)
        self.thor = True
        if isinstance(weight_init, Tensor):
            if weight_init.dim() != 2 or weight_init.shape[0] != out_channels or \
                    weight_init.shape[1] != in_channels:
                raise ValueError("weight_init shape error")

        self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight")

        if self.has_bias:
            if isinstance(bias_init, Tensor):
                if bias_init.dim() != 1 or bias_init.shape[0] != out_channels:
                    raise ValueError("bias_init shape error")

            self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias")

        self.matmul = P.MatMul(transpose_b=True)
        self.bias_add = P.BiasAdd()

        self.activation = get_activation(activation)
        self.activation_flag = self.activation is not None
        split_dim = 128
        matrix_A_shape, matrix_G_shape = caculate_matmul_shape(self.in_channels, self.out_channels, split_dim)
        self.matrix_A_inv = Parameter(Tensor(np.zeros(matrix_A_shape).astype(np.float32)),
                                      name='matrix_A_inv', requires_grad=False)
        self.matrix_G_inv = Parameter(Tensor(np.zeros(matrix_G_shape).astype(np.float32)),
                                      name="matrix_G_inv", requires_grad=False)
        self.broadcast_to = P.BroadcastTo(matrix_A_shape)
        self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.transpose = P.Transpose()
        self.mul = P.Mul()
        self.cube_matmul = P.MatMul(transpose_a=True)
        self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
        self.batch_size = Tensor(batch_size, mstype.float16)
        self.getG = P.InsertGradientOf(self.save_gradient)
        self.damping = Parameter(Tensor(damping), name="damping_value", requires_grad=False)
        self.dampingA = Tensor(np.identity(in_channels), mstype.float32)
        self.dampingG = Tensor(np.identity(out_channels), mstype.float32)
        self.cast = P.Cast()
        self.gather = P.GatherV2()
        self.freq = Tensor(frequency, mstype.int32)
        self.axis = 0
        self.add = P.TensorAdd()
        self.sqrt = P.Sqrt()
        self.cholesky = P.Cholesky(split_dim=split_dim)
        self.vector_matmul = P.BatchMatMul(transpose_a=True)
Exemplo n.º 6
0
    def __init__(self,
                 learning_rate,
                 parameters,
                 weight_decay=0.0,
                 loss_scale=1.0):
        super(Optimizer, self).__init__(auto_prefix=False)
        if parameters and not isinstance(parameters, list):
            parameters = list(parameters)

        if not parameters:
            raise ValueError("Optimizer got an empty parameter list.")

        if not isinstance(parameters[0], (dict, Parameter)):
            raise TypeError(
                "Only a list of Parameter or dict can be supported.")

        if isinstance(loss_scale, int):
            loss_scale = float(loss_scale)
        validator.check_value_type("loss_scale", loss_scale, [float], None)
        validator.check_number_range("loss_scale", loss_scale, 0.0,
                                     float("inf"), Rel.INC_NEITHER, None)

        if isinstance(weight_decay, int):
            weight_decay = float(weight_decay)
        validator.check_value_type("weight_decay", weight_decay, [float], None)
        validator.check_number_range("weight_decay", weight_decay, 0.0,
                                     float("inf"), Rel.INC_LEFT, None)

        self.is_group = False
        self.is_group_lr = False
        self.loss_scale = loss_scale
        if isinstance(learning_rate, float):
            self.dynamic_lr = False
            self.gather = None
            self.assignadd = None
            self.global_step = None
            self.scalar_lr = learning_rate
        else:
            self.dynamic_lr = True
            self.gather = P.GatherV2()
            self.assignadd = P.AssignAdd()
            self.global_step = Parameter(initializer(0, [1], mindspore.int32),
                                         name='global_step')
            self.scalar_lr = None

        learning_rate = self._get_single_lr(learning_rate)
        if isinstance(parameters[0], dict):
            self.is_group = True
            self.params = []
            self.group_lr = []
            self.group_weight_decay = []
            self._init_group_params(parameters, learning_rate, weight_decay)

        if self.is_group_lr:
            self.learning_rate = ParameterTuple(self.group_lr)
        else:
            self.learning_rate = Parameter(learning_rate, name="learning_rate")

        if self.is_group:
            self.parameters = ParameterTuple(self.params)
            self.weight_decay = tuple(self.group_weight_decay)
            decay_filter = lambda x: x > 0
            self.decay_flags = tuple(
                decay_filter(x) for x in self.weight_decay)
        else:
            self.parameters = ParameterTuple(parameters)
            self.weight_decay = weight_decay * loss_scale
            decay_filter = lambda x: 'beta' not in x.name and 'gamma' not in x.name
            self.decay_flags = tuple(decay_filter(x) for x in self.parameters)
        self.reciprocal_scale = 1.0 / loss_scale
        self.exec_weight_decay = any(self.decay_flags)
        self.param_length = len(self.parameters)
Exemplo n.º 7
0
 def __init__(self,):
     super(AssignSubNet, self).__init__()
     self.op = P.AssignSub()
     self.inputdata = Parameter(Tensor(np.zeros([1]).astype(np.bool_), mstype.bool_), name="assign_sub1")
Exemplo n.º 8
0
def test_parameter():
    x = Parameter(initializer(1, [1], ms.float32), name="beta1_power")
    x.init_data()
    z = x / 2
    print(z)
Exemplo n.º 9
0
 def __init__(self):
     super(Net, self).__init__()
     self.all_gather = P.AllGather(group=NCCL_WORLD_COMM_GROUP)
     self.x = Parameter(initializer(Tensor(x), x.shape), name='x')
Exemplo n.º 10
0
    def __init__(self):
        super(Net, self).__init__()

        self.maxpool = P.MaxPoolWithArgmax(padding="same", ksize=3, strides=2)
        self.x = Parameter(initializer('normal', [1, 64, 112, 112]), name='w')
        self.add = P.TensorAdd()
Exemplo n.º 11
0
 def __init__(self, normalized_shape, eps=1e-5):
     super(LayerNorm, self).__init__()
     self.gamma = Parameter(initializer('ones', normalized_shape), name="gamma")
     self.beta = Parameter(initializer('zeros', normalized_shape), name="beta")
     self.mean = P.ReduceMean(keep_dims=True)
     self.eps = eps
Exemplo n.º 12
0
 def __init__(self):
     super().__init__()
     self.max = P.ReduceMax()
     self.param = Parameter(Tensor(np.arange(2 * 2 * 2).reshape((2, 2, 2)), ms.float32), name="weight")
     self.zero = Tensor(np.zeros(([2, 2, 2])), ms.float32)
Exemplo n.º 13
0
    def __init__(self, learning_rate, parameters, weight_decay=0.0, loss_scale=1.0):
        super(Optimizer, self).__init__(auto_prefix=False)
        if parameters is not None and not isinstance(parameters, list):
            parameters = list(parameters)

        if not parameters:
            raise ValueError("Optimizer got an empty parameter list.")

        if not isinstance(parameters[0], (dict, Parameter)):
            raise TypeError("Only a list of Parameter or dict can be supported.")

        if isinstance(loss_scale, int):
            loss_scale = float(loss_scale)
        validator.check_value_type("loss_scale", loss_scale, [float], self.cls_name)
        validator.check_number_range("loss_scale", loss_scale, 0.0, float("inf"), Rel.INC_NEITHER, self.cls_name)
        self.loss_scale = loss_scale

        weight_decay = self._preprocess_weight_decay(weight_decay)

        self.dynamic_lr = False
        self.assignadd = None
        self.global_step = None
        self.is_group = False
        self.is_group_lr = False
        self.is_group_params_ordered = False
        learning_rate = self._preprocess_single_lr(learning_rate)
        if isinstance(parameters[0], dict):
            self.is_group = True
            self.group_params = []
            self.group_lr = []
            self.group_weight_decay = []
            self._init_group_params(parameters, learning_rate, weight_decay)

        # The final value of dynamic_lr can be determined after the process of parse_single_lr and init_group_params
        if self.dynamic_lr:
            self.assignadd = P.AssignAdd()
            self.global_step = Parameter(initializer(0, [1], mindspore.int32), name='global_step')

        if self.is_group_lr:
            if self.dynamic_lr:
                self.learning_rate = CellList(self.group_lr)
            else:
                self.learning_rate = ParameterTuple(self.group_lr)
        else:
            self.learning_rate = self._build_single_lr(learning_rate, 'learning_rate')
        if self.is_group:
            self.parameters = ParameterTuple(self.group_params)
            self.weight_decay = tuple(self.group_weight_decay)
            decay_filter = lambda x: x > 0
            self.decay_flags = tuple(decay_filter(x) for x in self.weight_decay)
            self.exec_weight_decay = any(self.decay_flags)
        else:
            self.parameters = ParameterTuple(parameters)
            self.weight_decay = weight_decay * loss_scale
            decay_filter = lambda x: 'beta' not in x.name and 'gamma' not in x.name
            self.decay_flags = tuple(decay_filter(x) for x in self.parameters)
            self.exec_weight_decay = self.weight_decay > 0
        ps_filter = lambda x: x.is_param_ps
        self.ps_parameters = tuple(ps_filter(x) for x in self.parameters)
        self.reciprocal_scale = 1.0 / loss_scale
        self.param_length = len(self.parameters)
        self.map_ = C.Map()

        use_parallel = context.get_auto_parallel_context("enable_parallel_optimizer")
        self.use_parallel = use_parallel
        if use_parallel:
            if self.cls_name not in ["Lamb", "AdamWeightDecay"]:
                raise RuntimeError("Optimizer segmentation does not support optimizer {}".format(self.cls_name))
            if _get_parallel_mode() != ParallelMode.DATA_PARALLEL:
                raise RuntimeError("Optimizer segmentation does not support parallel mode {}".format
                                   (_get_parallel_mode()))
            self.dev_num = _get_device_num()
            if self.dev_num > self.param_length:
                raise RuntimeError("Optimizer segmentation can not be applied when the number of parameters {} is"
                                   " less than the number of devices {}".format(self.param_length, self.dev_num))
            self.param_rank = self._get_parameter_group_id()
            self.optim_filter = tuple(map(lambda x: x == _get_global_rank(), self.param_rank))
            self.param_names = []
            for param in self.parameters:
                self.param_names.append(param.name)

        else:
            self.optim_filter = (True,) * self.param_length
Exemplo n.º 14
0
 def __iter__(self):
     cont = 0
     while cont < 3:
         cont += 1
         yield Parameter(Tensor(cont), name="cont")
Exemplo n.º 15
0
 def __init__(self):
     super(Net, self).__init__()
     self.mul = P.Mul()
     self.x = Parameter(initializer(Tensor(x), x.shape), name='x3')
     self.y = Parameter(initializer(Tensor(y), y.shape), name='y3')
Exemplo n.º 16
0
def _load_single_param(ckpt_file_name, param_name):
    """Load a parameter from checkpoint."""
    logger.info("Execute the process of loading checkpoint files.")
    checkpoint_list = Checkpoint()

    try:
        with open(ckpt_file_name, "rb") as f:
            pb_content = f.read()
        checkpoint_list.ParseFromString(pb_content)
    except BaseException as e:
        logger.error(
            "Failed to read the checkpoint file `%s`, please check the correct of the file.",
            ckpt_file_name)
        raise ValueError(e.__str__())

    parameter = None
    try:
        param_data_list = []
        for element_id, element in enumerate(checkpoint_list.value):
            if element.tag != param_name:
                continue
            data = element.tensor.tensor_content
            data_type = element.tensor.tensor_type
            np_type = tensor_to_np_type[data_type]
            ms_type = tensor_to_ms_type[data_type]
            element_data = np.frombuffer(data, np_type)
            param_data_list.append(element_data)
            if (element_id == len(checkpoint_list.value) - 1) or \
                    (element.tag != checkpoint_list.value[element_id + 1].tag):
                param_data = np.concatenate((param_data_list), axis=0)
                param_data_list.clear()
                dims = element.tensor.dims
                if dims == [0]:
                    if 'Float' in data_type:
                        param_data = float(param_data[0])
                    elif 'Int' in data_type:
                        param_data = int(param_data[0])
                    parameter = Parameter(Tensor(param_data, ms_type),
                                          name=element.tag)
                elif dims == [1]:
                    parameter = Parameter(Tensor(param_data, ms_type),
                                          name=element.tag)
                else:
                    param_dim = []
                    for dim in dims:
                        param_dim.append(dim)
                    param_value = param_data.reshape(param_dim)
                    parameter = Parameter(Tensor(param_value, ms_type),
                                          name=element.tag)
            break
        logger.info("Loading checkpoint files process is finished.")

    except BaseException as e:
        logger.error("Failed to load the checkpoint file `%s`.",
                     ckpt_file_name)
        raise RuntimeError(e.__str__())

    if parameter is None:
        raise ValueError(
            f"There is no parameter named {param_name} in this checkpoint file {ckpt_file_name}, "
            f"please check parameter name or checkpoint file.")
    return parameter
Exemplo n.º 17
0
    def _init_group_params(self, parameters, learning_rate, weight_decay):
        """Init learning rate or weight decay in group params."""
        origin_dynamic_lr = self.dynamic_lr
        if self.dynamic_lr:
            dynamic_lr_length = learning_rate.size()
        else:
            dynamic_lr_length = 0

        for group_param in parameters:
            lr_length = dynamic_lr_length
            if 'lr' in group_param.keys():
                self.is_group_lr = True
                self._get_single_lr(group_param['lr'])
                if isinstance(group_param['lr'], Iterable):
                    lr_length = len(group_param['lr'])
                    self.dynamic_lr = True
                elif isinstance(group_param['lr'], Tensor):
                    lr_length = group_param['lr'].size()
                    self.dynamic_lr = True
            if dynamic_lr_length not in (lr_length, 0):
                raise ValueError(
                    "The dynamic learning rate in group should be the same size."
                )
            dynamic_lr_length = lr_length

        if self.dynamic_lr and not origin_dynamic_lr:
            self.gather = P.GatherV2()
            self.assignadd = P.AssignAdd()
            self.global_step = Parameter(initializer(0, [1], mindspore.int32),
                                         name='global_step')

        params_store = []
        for group_param in parameters:
            self.params += group_param['params']
            if 'lr' in group_param.keys():
                params_dynamic_lr = isinstance(group_param['lr'],
                                               (Iterable, Tensor))

                if self.dynamic_lr and not params_dynamic_lr:
                    lr = Tensor(
                        np.array([group_param['lr']] *
                                 dynamic_lr_length).astype(np.float32))
                else:
                    lr = self._get_single_lr(group_param['lr'])
            else:
                if self.dynamic_lr and not origin_dynamic_lr:
                    lr = Tensor(
                        np.array([self.scalar_lr] * dynamic_lr_length).astype(
                            np.float32))
                else:
                    lr = learning_rate

            if 'weight_decay' in group_param.keys():
                validator.check_float_legal_value('weight_decay',
                                                  group_param['weight_decay'],
                                                  None)
                validator.check_number_range('weight_decay',
                                             group_param['weight_decay'], 0.0,
                                             float("inf"), Rel.INC_LEFT,
                                             self.cls_name)
                weight_decay_ = group_param['weight_decay'] * self.loss_scale
            else:
                weight_decay_ = weight_decay * self.loss_scale

            for key in group_param.keys():
                if key not in ('params', 'lr', 'weight_decay'):
                    logger.warning(
                        f"The optimizer cannot parse '{key}' when setting parameter groups."
                    )

            for param in group_param['params']:
                validator.check_value_type("parameter", param, [Parameter],
                                           self.cls_name)
                if param.name in params_store:
                    raise RuntimeError(
                        f"The {param.name} parameter has appeared in parameter groups."
                    )
                params_store.append(param.name)
                self.group_lr.append(Parameter(lr, name="lr_" + param.name))
                self.group_weight_decay.append(weight_decay_)
Exemplo n.º 18
0
def load_checkpoint(ckpt_file_name,
                    net=None,
                    strict_load=False,
                    filter_prefix=None):
    """
    Loads checkpoint info from a specified file.

    Args:
        ckpt_file_name (str): Checkpoint file name.
        net (Cell): Cell network. Default: None
        strict_load (bool): Whether to strict load the parameter into net. If False, it will load parameter
                           in the param_dict into net with the same suffix. Default: False
        filter_prefix (Union[str, list[str], tuple[str]]): Parameters starting with the filter_prefix
            will not be loaded. Default: None.

    Returns:
        Dict, key is parameter name, value is a Parameter.

    Raises:
        ValueError: Checkpoint file is incorrect.

    Examples:
        >>> ckpt_file_name = "./checkpoint/LeNet5-1_32.ckpt"
        >>> param_dict = load_checkpoint(ckpt_file_name, filter_prefix="conv1")
    """
    ckpt_file_name, filter_prefix = _check_checkpoint_param(
        ckpt_file_name, filter_prefix)
    logger.info("Execute the process of loading checkpoint files.")
    checkpoint_list = Checkpoint()

    try:
        with open(ckpt_file_name, "rb") as f:
            pb_content = f.read()
        checkpoint_list.ParseFromString(pb_content)
    except BaseException as e:
        logger.error(
            "Failed to read the checkpoint file `%s`, please check the correct of the file.",
            ckpt_file_name)
        raise ValueError(e.__str__())

    parameter_dict = {}
    try:
        param_data_list = []
        for element_id, element in enumerate(checkpoint_list.value):
            if filter_prefix is not None and _check_param_prefix(
                    filter_prefix, element.tag):
                continue
            data = element.tensor.tensor_content
            data_type = element.tensor.tensor_type
            np_type = tensor_to_np_type[data_type]
            ms_type = tensor_to_ms_type[data_type]
            element_data = np.frombuffer(data, np_type)
            param_data_list.append(element_data)
            if (element_id == len(checkpoint_list.value) - 1) or \
                    (element.tag != checkpoint_list.value[element_id + 1].tag):
                param_data = np.concatenate((param_data_list), axis=0)
                param_data_list.clear()
                dims = element.tensor.dims
                if dims == [0]:
                    if 'Float' in data_type:
                        param_data = float(param_data[0])
                    elif 'Int' in data_type:
                        param_data = int(param_data[0])
                    parameter_dict[element.tag] = Parameter(Tensor(
                        param_data, ms_type),
                                                            name=element.tag)
                elif dims == [1]:
                    parameter_dict[element.tag] = Parameter(Tensor(
                        param_data, ms_type),
                                                            name=element.tag)
                else:
                    param_dim = []
                    for dim in dims:
                        param_dim.append(dim)
                    param_value = param_data.reshape(param_dim)
                    parameter_dict[element.tag] = Parameter(Tensor(
                        param_value, ms_type),
                                                            name=element.tag)

        logger.info("Loading checkpoint files process is finished.")

    except BaseException as e:
        logger.error("Failed to load the checkpoint file `%s`.",
                     ckpt_file_name)
        raise RuntimeError(e.__str__())

    if not parameter_dict:
        raise ValueError(
            f"The loaded parameter dict is empty after filtering, please check filter_prefix."
        )

    if net is not None:
        load_param_into_net(net, parameter_dict, strict_load)

    return parameter_dict
Exemplo n.º 19
0
 def __init__(self, max_cycles=10):
     super(ForwardNet, self).__init__()
     self.max_cycles = max_cycles
     self.i = Tensor(np.array(0), mstype.int32)
     self.zero = Tensor(np.array(0), mstype.int32)
     self.weight = Parameter(Tensor(np.array(0), mstype.int32))
Exemplo n.º 20
0
def merge_sliced_parameter(sliced_parameters, strategy=None):
    """
    Merge parameter slices to one whole parameter.

    Args:
        sliced_parameters (list[Parameter]): Parameter slices in order of rank_id.
        strategy (dict): Parameter slice strategy, the default is None.
            If strategy is None, just merge parameter slices in 0 axis order.

            - key (str): Parameter name.
            - value (<class 'node_strategy_pb2.ParallelLayouts'>): Slice strategy of this parameter.

    Returns:
        Parameter, the merged parameter which has the whole data.

    Raises:
        ValueError: Failed to merge.
        TypeError: The sliced_parameters is incorrect or strategy is not dict.
        KeyError: The parameter name is not in keys of strategy.

    Examples:
        >>> sliced_parameters = [
        ...                      Parameter(Tensor(np.array([0.00023915, 0.00013939, -0.00098059])),
        ...                                "network.embedding_table"),
        ...                      Parameter(Tensor(np.array([0.00015815, 0.00015458, -0.00012125])),
        ...                                "network.embedding_table"),
        ...                      Parameter(Tensor(np.array([0.00042165, 0.00029692, -0.00007941])),
        ...                                "network.embedding_table"),
        ...                      Parameter(Tensor(np.array([0.00084451, 0.00089960, -0.00010431])),
        ...                                "network.embedding_table")]
        >>> merged_parameter = merge_sliced_parameter(sliced_parameters)
    """
    if not isinstance(sliced_parameters, list):
        raise TypeError(
            f"The sliced_parameters should be list, but got {type(sliced_parameters)}."
        )

    if not sliced_parameters:
        raise ValueError("The sliced_parameters should not be empty.")

    if strategy and not isinstance(strategy, dict):
        raise TypeError(
            f"The strategy should be dict, but got {type(strategy)}.")

    try:
        parameter_name = sliced_parameters[0].name
        parameter_shape = sliced_parameters[0].data.shape
        parameter_shape_length = len(parameter_shape)
    except BaseException as e:
        raise TypeError(
            f"{e.__str__()}. the element in sliced_parameters should be Parameter."
        )

    is_even = True
    for index, parameter in enumerate(sliced_parameters):
        if not isinstance(parameter, Parameter):
            raise TypeError(
                f"The element in sliced_parameters should be Parameter, "
                f"but got {type(parameter)} at index {index}.")

        if parameter.name != parameter_name \
                or len(parameter.data.shape) != parameter_shape_length \
                or parameter.data.shape[1:] != parameter_shape[1:]:
            raise ValueError(
                "Please make sure that the elements in slice_parameters have the same name, "
                "dimension length and shape except 0 axis")

        if parameter.data.shape != parameter_shape:
            is_even = False

    layerwise_parallel = sliced_parameters[0].layerwise_parallel
    requires_grad = sliced_parameters[0].requires_grad
    sliced_data = [parameter.data.asnumpy() for parameter in sliced_parameters]
    merged_parameter = None

    if not strategy:
        merged_tensor = Tensor(np.concatenate(sliced_data))
        merged_parameter = Parameter(merged_tensor, parameter_name,
                                     requires_grad, layerwise_parallel)

    else:
        if parameter_name not in strategy.keys():
            raise KeyError(
                f"The parameter name should be one key of strategy. "
                f"the parameter name is {parameter_name}.")
        merged_tensor = _merge_param_with_strategy(sliced_data, parameter_name,
                                                   strategy, is_even)
        merged_parameter = Parameter(merged_tensor, parameter_name,
                                     requires_grad, layerwise_parallel)

    return merged_parameter
Exemplo n.º 21
0
 def __init__(self):
     super(Net, self).__init__()
     self.z = Parameter(Tensor(np.array([1.0], np.float32)), name='z')
Exemplo n.º 22
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride,
                 pad_mode,
                 padding,
                 dilation,
                 group,
                 has_bias,
                 weight_init,
                 bias_init,
                 transposed=False):
        super(_Conv, self).__init__()
        self.in_channels = Validator.check_positive_int(in_channels)
        self.out_channels = Validator.check_positive_int(out_channels)
        self.kernel_size = kernel_size
        self.stride = stride
        self.pad_mode = pad_mode
        self.weight_init = weight_init
        self.bias_init = bias_init
        if isinstance(padding, int):
            Validator.check_integer('padding', padding, 0, Rel.GE,
                                    self.cls_name)
            self.padding = padding
        elif isinstance(padding, tuple):
            for pad in padding:
                Validator.check_integer('padding item', pad, 0, Rel.GE,
                                        self.cls_name)
            self.padding = padding
        else:
            raise TypeError(
                "padding type must be int/tuple(int) cannot be {}!".format(
                    type(padding)))

        self.dilation = dilation
        self.group = Validator.check_positive_int(group)
        self.has_bias = has_bias
        if (not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \
                isinstance(kernel_size[0], bool) or isinstance(kernel_size[1], bool) or \
                kernel_size[0] < 1 or kernel_size[1] < 1:
            raise ValueError(
                "Attr 'kernel_size' of 'Conv2D' Op passed " +
                str(self.kernel_size) +
                ", should be a int or tuple and equal to or greater than 1.")
        if (not isinstance(stride[0], int)) or (not isinstance(stride[1], int)) or \
                isinstance(stride[0], bool) or isinstance(stride[1], bool) or stride[0] < 1 or stride[1] < 1:
            raise ValueError(
                "Attr 'stride' of 'Conv2D' Op passed " + str(self.stride) +
                ", should be a int or tuple and equal to or greater than 1.")
        if (not isinstance(dilation[0], int)) or (not isinstance(dilation[1], int)) or \
                isinstance(dilation[0], bool) or isinstance(dilation[1], bool) or dilation[0] < 1 or dilation[1] < 1:
            raise ValueError(
                "Attr 'dilation' of 'Conv2D' Op passed " + str(self.dilation) +
                ", should be a int or tuple and equal to or greater than 1.")
        if in_channels % group != 0:
            raise ValueError(
                "Attr 'in_channels' of 'Conv2D' Op must be divisible by "
                "attr 'group' of 'Conv2D' Op.")
        if out_channels % group != 0:
            raise ValueError(
                "Attr 'out_channels' of 'Conv2D' Op must be divisible by "
                "attr 'group' of 'Conv2D' Op.")
        if transposed:
            shape = [in_channels, out_channels // group, *kernel_size]
        else:
            shape = [out_channels, in_channels // group, *kernel_size]
        self.weight = Parameter(initializer(self.weight_init, shape),
                                name='weight')

        if Validator.check_bool(has_bias):
            self.bias = Parameter(initializer(self.bias_init, [out_channels]),
                                  name='bias')
        else:
            if self.bias_init != 'zeros':
                logger.warning(
                    "Value of 'has_bias' is False, value of 'bias_init' will be ignored."
                )
            self.bias = None
Exemplo n.º 23
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 pad_mode='same',
                 padding=0,
                 dilation=1,
                 group=1,
                 data_format='NCHW',
                 has_bias=False,
                 weight_init='normal',
                 damping=0.03,
                 loss_scale=1,
                 frequency=278,
                 batch_size=32,
                 bias_init='zeros'):
        self.thor = True
        self.hw = kernel_size * kernel_size
        kernel_size = twice(kernel_size)
        super(Conv2d_Thor_GPU, self).__init__(
            in_channels,
            out_channels,
            kernel_size,
            stride,
            pad_mode,
            padding,
            dilation,
            group,
            data_format,
            has_bias,
            weight_init,
            bias_init,
        )
        self.conv2d = P.Conv2D(out_channel=self.out_channels,
                               kernel_size=self.kernel_size,
                               mode=1,
                               pad_mode=self.pad_mode,
                               pad=self.padding,
                               stride=self.stride,
                               dilation=self.dilation,
                               group=self.group
                               )

        self.matrix_A_dim = self.in_channels * self.kernel_size[0] * self.kernel_size[1]
        self.matrix_G_dim = self.out_channels

        split_dim = 128
        matrix_A_shape, matrix_G_shape = caculate_matmul_shape(self.matrix_A_dim, self.matrix_G_dim, split_dim)
        self.matrix_A_inv = Parameter(np.zeros(matrix_A_shape).astype(np.float32),
                                      name='matrix_A_inv', requires_grad=False)
        self.matrix_G_inv = Parameter(np.zeros(matrix_G_shape).astype(np.float32),
                                      name='matrix_A_inv', requires_grad=False)
        self.broadcast_to = P.BroadcastTo(matrix_A_shape)
        self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
        self.img2col = P.Im2Col(kernel_size=kernel_size, stride=stride, pad_mode="same")
        self.matmul = P.MatMul(transpose_b=True)
        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.mul = P.Mul()
        self.getG = P.InsertGradientOf(self.save_gradient)
        self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
        self.batch_size = Tensor(batch_size, mstype.float16)
        self.transpose = P.Transpose()
        self.cast = P.Cast()
        self.gather = P.GatherV2()
        self.freq = Tensor(frequency, mstype.int32)
        self.axis = 0
        self.sqrt = P.Sqrt()
        self.reduce_mean = P.ReduceMean(keep_dims=False)
        self.damping = Parameter(Tensor(damping), name="damping_value", requires_grad=False)
        self.dampingA = Tensor(np.identity(self.matrix_A_dim), mstype.float32)
        self.dampingG = Tensor(np.identity(self.matrix_G_dim), mstype.float32)
        self.cholesky = P.Cholesky(split_dim=split_dim)
        self.vector_matmul = P.BatchMatMul(transpose_a=True)
Exemplo n.º 24
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 pad_mode='same',
                 padding=0,
                 dilation=1,
                 group=1,
                 has_bias=False,
                 weight_init='normal',
                 bias_init='zeros'):
        kernel_size = twice(kernel_size)
        stride = twice(stride)
        dilation = twice(dilation)
        Validator.check_value_type('padding', padding, (int, tuple),
                                   self.cls_name)
        if isinstance(padding, tuple):
            Validator.check_integer('padding size', len(padding), 4, Rel.EQ,
                                    self.cls_name)
        # out_channels and in_channels swap.
        # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel,
        # then Conv2dTranspose's out_channel refers to Conv2DBackpropInput's in_channel.
        super(Conv2dTranspose, self).__init__(in_channels,
                                              out_channels,
                                              kernel_size,
                                              stride,
                                              pad_mode,
                                              padding,
                                              dilation,
                                              group,
                                              has_bias,
                                              weight_init,
                                              bias_init,
                                              transposed=True)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.shape = P.Shape()
        if pad_mode not in ('valid', 'same', 'pad'):
            raise ValueError(
                'Attr \'pad_mode\' of \'Conv2dTranspose\' Op passed ' +
                str(pad_mode) +
                ', should be one of values in \'valid\', \'same\', \'pad\'.')
        self.is_valid = self.pad_mode == 'valid'
        self.is_same = self.pad_mode == 'same'
        self.is_pad = self.pad_mode == 'pad'
        if Validator.check_bool(has_bias):
            self.bias = Parameter(initializer(bias_init, [out_channels]),
                                  name='bias')

        # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel.
        self.conv2d_transpose = P.Conv2DBackpropInput(out_channel=in_channels,
                                                      kernel_size=kernel_size,
                                                      mode=1,
                                                      pad_mode=pad_mode,
                                                      pad=padding,
                                                      stride=stride,
                                                      dilation=dilation,
                                                      group=group)
        self.bias_add = P.BiasAdd()
        if isinstance(self.padding, int):
            self.padding_top, self.padding_bottom, self.padding_left, self.padding_right = (
                self.padding, ) * 4
        else:
            self.padding_top, self.padding_bottom, self.padding_left, self.padding_right = self.padding
Exemplo n.º 25
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 pad_mode='same',
                 padding=0,
                 dilation=1,
                 group=1,
                 data_format='NCHW',
                 has_bias=False,
                 weight_init='normal',
                 damping=0.03,
                 loss_scale=1,
                 frequency=278,
                 batch_size=32,
                 bias_init='zeros'):
        self.thor = True
        ksizes = (1, kernel_size, kernel_size, 1)
        self.hw = kernel_size * kernel_size
        strides = (1, stride, stride, 1)
        kernel_size = twice(kernel_size)
        super(Conv2d_Thor, self).__init__(
            in_channels,
            out_channels,
            kernel_size,
            stride,
            pad_mode,
            padding,
            dilation,
            group,
            data_format,
            has_bias,
            weight_init,
            bias_init,
        )
        self.conv2d = P.Conv2D(out_channel=self.out_channels,
                               kernel_size=self.kernel_size,
                               mode=1,
                               pad_mode=self.pad_mode,
                               pad=self.padding,
                               stride=self.stride,
                               dilation=self.dilation,
                               group=self.group
                               )
        self.batch_size = batch_size
        self.img2col = P.CusImg2Col(ksizes=ksizes, strides=strides)
        self.cube_matmul = P.CusMatMulCube(transpose_a=True)
        self.matrix_combine = P.CusMatrixCombine()
        self.cholesky = P.CusCholeskyTrsm()
        self.transpose02314 = P.CusTranspose02314()
        self.matrix_A_dim = self.in_channels * self.kernel_size[0] * self.kernel_size[1]
        self.matrix_G_dim = self.out_channels
        self.matrix_A_device_shape, self.matrix_A_device_dim = caculate_device_shape(self.matrix_A_dim,
                                                                                     self.in_channels, True)
        self.matrix_G_device_shape, self.matrix_G_device_dim = caculate_device_shape(self.matrix_G_dim,
                                                                                     self.in_channels, False)
        self.matrix_A_device_temp_shape = (
            self.matrix_A_device_shape[0], self.matrix_A_device_shape[2], self.matrix_A_device_shape[1],
            self.matrix_A_device_shape[3])
        self.matrix_G_device_temp_shape = (
            self.matrix_G_device_shape[0], self.matrix_G_device_shape[2], self.matrix_G_device_shape[1],
            self.matrix_G_device_shape[3])
        self.matrix_A_inv = Parameter(
            Tensor(np.reshape(np.identity(self.matrix_A_device_dim).astype(np.float16), self.matrix_A_device_shape)),
            name='matrix_A_inv', requires_grad=False)
        self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), name="A_inv_max", requires_grad=False)
        self.matrix_G_inv = Parameter(
            Tensor(np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape)),
            name="matrix_G_inv", requires_grad=False)

        self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False)
        self.fake_G = Tensor(
            np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape))

        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.transpose = P.Transpose()
        self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
        self.mul = P.Mul()
        self.cast = P.Cast()
        self.damping = Tensor(damping)
        self.vector_matmul = P.CusBatchMatMul()
        self.diag_block_dim = 128
        self.channels_slice_flag = False
        if self.in_channels % C0 != 0:
            self.channels_slice_flag = True

        self.padA_flag = False
        if (self.matrix_A_dim // self.diag_block_dim) * self.diag_block_dim != self.matrix_A_dim \
            and self.matrix_A_dim > self.diag_block_dim:
            self.padA_flag = True
            pad_dim = self.diag_block_dim - self.matrix_A_dim % self.diag_block_dim
            self.padA = P.Pad(((0, pad_dim), (0, pad_dim)))
        self.device_shape_pad_flag = False
        if self.matrix_A_dim != self.matrix_A_device_dim:
            self.device_shape_pad_flag = True
            self.device_shape_pad = P.Pad(((0, 0), (0, C0 - self.in_channels), (0, 0), (0, C0 - self.in_channels)))
        self.slice = P.Slice()
        self.gather = P.GatherV2()
        self.freq = Tensor(frequency, mstype.int32)
        self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
        self.axis = 0

        dampingA_dim = self.matrix_A_dim
        if (self.matrix_A_dim % self.diag_block_dim) != 0 and self.matrix_A_dim > self.diag_block_dim:
            dampingA_dim = (self.matrix_A_dim // self.diag_block_dim + 1) * self.diag_block_dim
        dampingG_dim = self.matrix_G_dim
        if (self.matrix_G_dim % self.diag_block_dim) != 0 and self.matrix_G_dim > self.diag_block_dim:
            dampingG_dim = (self.matrix_G_dim // self.diag_block_dim + 1) * self.diag_block_dim

        self.dampingA = Tensor(np.identity(dampingA_dim), mstype.float32)
        self.dampingG = Tensor(np.identity(dampingG_dim), mstype.float32)
        self.fused_abs_max1 = P.CusFusedAbsMax1([self.matrix_A_dim, self.matrix_A_dim])
        self.fused_abs_max2 = P.CusFusedAbsMax1()
        self.log = P.Log()
        self.exp = P.Exp()
        self.sqrt = P.Sqrt()
        self.getG = P.InsertGradientOf(self.save_gradient)
Exemplo n.º 26
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 pad_mode='same',
                 padding=0,
                 dilation=1,
                 group=1,
                 has_bias=False,
                 weight_init='normal',
                 bias_init='zeros'):
        Validator.check_value_type("kernel_size", kernel_size, [int],
                                   self.cls_name)
        Validator.check_value_type("stride", stride, [int], self.cls_name)
        Validator.check_value_type("padding", padding, [int], self.cls_name)
        Validator.check_value_type("dilation", dilation, [int], self.cls_name)
        Validator.check_integer('kernel_size', kernel_size, 1, Rel.GE,
                                self.cls_name)
        Validator.check_integer('stride', stride, 1, Rel.GE, self.cls_name)
        Validator.check_integer('padding', padding, 0, Rel.GE, self.cls_name)
        Validator.check_integer('dilation', dilation, 1, Rel.GE, self.cls_name)
        kernel_size = (1, kernel_size)
        stride = (1, stride)
        dilation = (1, dilation)
        get_shape = P.Shape()
        get_dtype = P.DType()
        if isinstance(weight_init, Tensor):
            weight_init_shape = get_shape(weight_init)
            Validator.check_integer('weight_init_shape',
                                    len(weight_init_shape), 3, Rel.EQ,
                                    self.cls_name)
            weight_init_dtype = get_dtype(weight_init)
            weight_init_value = weight_init.asnumpy()
            weight_init_value = np.expand_dims(weight_init_value, 2)
            weight_init = Tensor(weight_init_value, weight_init_dtype)
        # out_channels and in_channels swap.
        # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel,
        # then Conv1dTranspose's out_channel refers to Conv2DBackpropInput's in_channel.
        super(Conv1dTranspose, self).__init__(in_channels,
                                              out_channels,
                                              kernel_size,
                                              stride,
                                              pad_mode,
                                              padding,
                                              dilation,
                                              group,
                                              has_bias,
                                              weight_init,
                                              bias_init,
                                              transposed=True)
        self.padding = (0, 0, padding, padding)
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.shape = P.Shape()
        if pad_mode not in ('valid', 'same', 'pad'):
            raise ValueError(
                'Attr \'pad_mode\' of \'Conv1dTranspose\' Op passed ' +
                str(pad_mode) +
                ', should be one of values in \'valid\', \'same\', \'pad\'.')
        self.is_valid = self.pad_mode == 'valid'
        self.is_same = self.pad_mode == 'same'
        self.is_pad = self.pad_mode == 'pad'
        if Validator.check_bool(has_bias):
            self.bias = Parameter(initializer(bias_init, [out_channels]),
                                  name='bias')

        # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel.
        self.conv2d_transpose = P.Conv2DBackpropInput(out_channel=in_channels,
                                                      kernel_size=kernel_size,
                                                      mode=1,
                                                      pad_mode=pad_mode,
                                                      pad=self.padding,
                                                      stride=stride,
                                                      dilation=dilation,
                                                      group=group)
        self.bias_add = P.BiasAdd()
        self.expand_dims = P.ExpandDims()
        self.squeeze = P.Squeeze(2)
Exemplo n.º 27
0
 def __init__(self):
     super(Layer2, self).__init__()
     self.z2 = Parameter(
         Tensor(np.full([128, 96], 0.6, dtype=np.float32)), name='z2')
Exemplo n.º 28
0
    def __init__(self,
                 num_features,
                 eps=1e-5,
                 momentum=0.9,
                 affine=True,
                 gamma_init='ones',
                 beta_init='zeros',
                 moving_mean_init='zeros',
                 moving_var_init='ones',
                 use_batch_statistics=None,
                 device_num_each_group=1):
        super(_BatchNorm, self).__init__()
        if num_features < 1:
            raise ValueError("num_features must be at least 1")

        if momentum < 0 or momentum > 1:
            raise ValueError(
                "momentum should be a number in range [0, 1], but got {}".
                format(momentum))

        self.use_batch_statistics = use_batch_statistics
        self.num_features = num_features
        self.eps = eps
        self.moving_mean = Parameter(initializer(moving_mean_init,
                                                 num_features),
                                     name="mean",
                                     requires_grad=False)
        self.moving_variance = Parameter(initializer(moving_var_init,
                                                     num_features),
                                         name="variance",
                                         requires_grad=False)
        self.gamma = Parameter(initializer(gamma_init, num_features),
                               name="gamma",
                               requires_grad=affine)
        self.beta = Parameter(initializer(beta_init, num_features),
                              name="beta",
                              requires_grad=affine)
        self.group = check_int_positive(device_num_each_group)
        self.is_global = False
        if self.group != 1:
            self.rank_id = get_rank()
            self.rank_size = get_group_size()
            self.device_list = [i for i in range(0, self.rank_size)]
            self.rank_list = self.list_group(self.device_list, self.group)
            self.rank_list_idx = len(self.rank_list)
            for i in range(self.rank_list_idx):
                if self.rank_id in self.rank_list[i] and self.group != 1:
                    self.is_global = True
                    management.create_group('group' + str(i),
                                            self.rank_list[i])
                    self.all_reduce = P.AllReduce(
                        P.ReduceOp.SUM,
                        'group' + str(i)).add_prim_attr('fusion', 1)
        self.shape = P.Shape()
        self.reduce_mean = P.ReduceMean(keep_dims=True)
        self.square = P.Square()
        self.sqrt = P.Sqrt()
        self.cast = P.Cast()
        self.dtype = P.DType()
        self.reshape = P.Reshape()
        self.is_ascend = context.get_context("device_target") == "Ascend"
        self.is_graph_mode = context.get_context("mode") == context.GRAPH_MODE
        self.momentum = 1.0 - momentum
        if context.get_context("enable_ge"):
            self.is_ge_backend = True
        else:
            self.is_ge_backend = False

        if self.is_graph_mode and (self.is_ge_backend or self.is_ascend):
            self.bn_train = P.BatchNorm(is_training=True, epsilon=self.eps)
        else:
            self.bn_train = P.FusedBatchNorm(mode=1,
                                             epsilon=self.eps,
                                             momentum=self.momentum)
        self.bn_infer = P.BatchNorm(is_training=False, epsilon=self.eps)

        data_parallel_strategy = ((1, ), (1, ))
        data_parallel_strategy_one = ((1, ), ())
        self.sub_mean = P.Sub().set_strategy(data_parallel_strategy)
        self.sub_var = P.Sub().set_strategy(data_parallel_strategy)
        self.mul_mean = P.Mul().set_strategy(data_parallel_strategy_one)
        self.mul_var = P.Mul().set_strategy(data_parallel_strategy_one)
        self.assign_sub_mean = P.AssignSub().set_strategy(
            data_parallel_strategy)
        self.assign_sub_var = P.AssignSub().set_strategy(
            data_parallel_strategy)
Exemplo n.º 29
0
 def __init__(self):
     super().__init__()
     self.param_a = Parameter(Tensor(5, mstype.int32), name='a')
     self.param_b = Parameter(Tensor(4, mstype.int32), name='b')
Exemplo n.º 30
0
 def __init__(self, strategy1, strategy2):
     super().__init__()
     self.param = Parameter(initializer("zeros", [64, 64]), name="param")
     self.cell = Net(strategy1, strategy2, self.param)