示例#1
0
 def __init__(self, network, optimizer, sens=1.0):
     super(TrainAccuStepsCell, self).__init__(network, optimizer, sens)
     self.accumulation = False
     self.accumulation_steps = context.get_auto_parallel_context(
         "grad_accumulation_step")
     self.accu_grads = self.weights.clone(prefix="accu_grads", init='zeros')
     self.hyper_map = ops.HyperMap()
 def __init__(self, network, optimizer, scale_update_cell=None):
     super(BertTrainOneStepWithLossScaleCell, self).__init__(auto_prefix=False)
     self.network = network
     self.weights = ParameterTuple(network.trainable_params())
     self.optimizer = optimizer
     self.grad = ops.GradOperation(
         get_by_list=True,
         sens_param=True)
     self.reducer_flag = False
     self.allreduce = ops.AllReduce()
     self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
     if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]:
         self.reducer_flag = True
     self.grad_reducer = ops.identity
     self.degree = 1
     if self.reducer_flag:
         self.degree = get_group_size()
         self.grad_reducer = DistributedGradReducer(optimizer.parameters, False, self.degree)
     self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE)
     self.cast = ops.Cast()
     self.alloc_status = ops.NPUAllocFloatStatus()
     self.get_status = ops.NPUGetFloatStatus()
     self.clear_before_grad = ops.NPUClearFloatStatus()
     self.reduce_sum = ops.ReduceSum(keep_dims=False)
     self.depend_parameter_use = ops.ControlDepend(depend_mode=1)
     self.base = Tensor(1, mstype.float32)
     self.less_equal = ops.LessEqual()
     self.hyper_map = ops.HyperMap()
     self.loss_scale = None
     self.loss_scaling_manager = scale_update_cell
     if scale_update_cell:
         self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
                                     name="loss_scale")
示例#3
0
 def __init__(self, network, optimizer, grad_sum, sens=1.0):
     super(TrainForwardBackward, self).__init__(auto_prefix=False)
     self.network = network
     self.network.set_grad()
     self.network.add_flags(defer_inline=True)
     self.weights = ParameterTuple(network.trainable_params())
     self.optimizer = optimizer
     self.grad_sum = grad_sum
     self.grad = ops.GradOperation(get_by_list=True, sens_param=True)
     self.sens = sens
     self.hyper_map = ops.HyperMap()
    def __init__(self, network, optimizer, sens=1.0):
        super(BertTrainOneStepCell, self).__init__(auto_prefix=False)
        self.network = network
        self.weights = ParameterTuple(network.trainable_params())
        self.optimizer = optimizer
        self.grad = ops.GradOperation(get_by_list=True, sens_param=True)
        self.sens = sens
        self.reducer_flag = False
        self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
        if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]:
            self.reducer_flag = True
        self.grad_reducer = None
        if self.reducer_flag:
            mean = context.get_auto_parallel_context("mirror_mean")
            degree = get_group_size()
            self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)

        self.cast = ops.Cast()
        self.hyper_map = ops.HyperMap()
示例#5
0
    def __init__(self, network, optimizer, scale_update_cell=None):

        super(BertPoetryCell, self).__init__(network, optimizer, scale_update_cell)
        self.network = network
        self.weights = ParameterTuple(network.trainable_params())
        self.optimizer = optimizer
        self.grad = ops.GradOperation(
            get_by_list=True,
            sens_param=True)
        self.reducer_flag = False
        self.allreduce = ops.AllReduce()
        self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
        if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]:
            self.reducer_flag = True
        self.grad_reducer = None
        if self.reducer_flag:
            mean = context.get_auto_parallel_context("mirror_mean")
            degree = get_group_size()
            self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
        self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE)
        self.cast = ops.Cast()
        self.gpu_target = False
        if context.get_context("device_target") == "GPU":
            self.gpu_target = True
            self.float_status = ops.FloatStatus()
            self.addn = ops.AddN()
            self.reshape = ops.Reshape()
        else:
            self.alloc_status = ops.NPUAllocFloatStatus()
            self.get_status = ops.NPUGetFloatStatus()
            self.clear_before_grad = ops.NPUClearFloatStatus()
        self.reduce_sum = ops.ReduceSum(keep_dims=False)
        self.base = Tensor(1, mstype.float32)
        self.less_equal = ops.LessEqual()
        self.hyper_map = ops.HyperMap()
        self.loss_scale = None
        self.loss_scaling_manager = scale_update_cell
        if scale_update_cell:
            self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
                                        name="loss_scale")
示例#6
0
 def __init__(self, grad_sum, zeros):
     super(TrainClear, self).__init__(auto_prefix=False)
     self.grad_sum = grad_sum
     self.zeros = zeros
     self.hyper_map = ops.HyperMap()
示例#7
0
 def __init__(self):
     super(GradScale, self).__init__()
     self.hyper_map = ops.HyperMap()
示例#8
0
 def __init__(self, clip_norm=1.0):
     super(ClipByNorm, self).__init__()
     self.hyper_map = ops.HyperMap()
     self.clip_norm = clip_norm