Пример #1
0
def arithmetic_simplify_02(x, y):
    """ arithmetic_simplify_02 """
    return C.ones_like(x) * y
Пример #2
0
def arithmetic_simplify_03(x, y):
    """ arithmetic_simplify_03 """
    return x * C.ones_like(y)
Пример #3
0
    def construct(self, data, label, sens=None):
        """
        construct a compute flow.
        """
        init = False
        if not self.gpu_target:
            # init overflow buffer
            init = self.alloc_status()
            # clear overflow buffer
            self.clear_status(init)

        if sens is None:
            scaling_sens = self.loss_scale
        else:
            scaling_sens = sens

        # DP clip
        weights = self.weights
        record_datas = self._split(data)
        record_labels = self._split(label)
        # first index
        loss = self.network(record_datas[0], record_labels[0])
        scaling_sens_filled = C.ones_like(loss) * F.cast(
            scaling_sens, F.dtype(loss))
        record_grad = self.grad(self.network,
                                weights)(record_datas[0], record_labels[0],
                                         scaling_sens_filled)

        beta = self._zero
        square_sum = self._zero
        for grad in record_grad:
            square_sum = self._add(square_sum,
                                   self._reduce_sum(self._square_all(grad)))
        norm_grad = self._sqrt(square_sum)
        beta = self._add(
            beta,
            self._cast(self._less(norm_grad, self._norm_bound),
                       mstype.float32))
        record_grad = self._clip_by_global_norm(record_grad,
                                                GRADIENT_CLIP_TYPE,
                                                self._norm_bound)
        grads = record_grad
        total_loss = loss
        for i in range(1, self._micro_batches):
            loss = self.network(record_datas[i], record_labels[i])
            scaling_sens_filled = C.ones_like(loss) * F.cast(
                scaling_sens, F.dtype(loss))
            record_grad = self.grad(self.network,
                                    weights)(record_datas[i], record_labels[i],
                                             scaling_sens_filled)

            square_sum = self._zero
            for grad in record_grad:
                square_sum = self._add(
                    square_sum, self._reduce_sum(self._square_all(grad)))
            norm_grad = self._sqrt(square_sum)
            beta = self._add(
                beta,
                self._cast(self._less(norm_grad, self._norm_bound),
                           mstype.float32))

            record_grad = self._clip_by_global_norm(record_grad,
                                                    GRADIENT_CLIP_TYPE,
                                                    self._norm_bound)
            grads = self._tuple_add(grads, record_grad)
            total_loss = P.TensorAdd()(total_loss, loss)
        loss = P.Div()(total_loss, self._micro_float)
        beta = self._div(beta, self._micro_batches)

        if self._noise_mech is not None:
            grad_noise_tuple = ()
            for grad_item in grads:
                grad_noise = self._mech(grad_item)
                grad_noise_tuple = grad_noise_tuple + (grad_noise, )
            grads = self._tuple_add(grads, grad_noise_tuple)
            grads = self._hyper_map(F.partial(_grad_scale, self._micro_float),
                                    grads)
            # update mech parameters

            if self._noise_mech_param_updater is not None:
                multiplier = self._noise_mech_param_updater()
                loss = F.depend(loss, multiplier)

        grads = self.hyper_map(F.partial(_grad_scale, scaling_sens), grads)
        # apply grad reducer on grads
        grads = self.grad_reducer(grads)
        # get the overflow buffer
        if not self.gpu_target:
            self.get_status(init)
            # sum overflow buffer elements, 0:not overflow , >0:overflow
            flag_sum = self.reduce_sum(init, (0, ))
        else:
            flag_sum = self.hyper_map(F.partial(_grad_overflow), grads)
            flag_sum = self.addn(flag_sum)
            # convert flag_sum to scalar
            flag_sum = self.reshape(flag_sum, (()))
        if self.is_distributed:
            # sum overflow flag over devices
            flag_reduce = self.allreduce(flag_sum)
            cond = self.less_equal(self.base, flag_reduce)
        else:
            cond = self.less_equal(self.base, flag_sum)
        overflow = cond
        if sens is None:
            overflow = self.loss_scaling_manager(self.loss_scale, cond)
        # if there is no overflow, do optimize
        if overflow:
            opt = False
        else:
            opt = self.optimizer(grads)
        ret = (loss, cond, scaling_sens)

        if self._clip_mech is not None:
            next_norm_bound = self._clip_mech(beta, self._norm_bound)
            P.assign(self._norm_bound, next_norm_bound)
        return F.depend(ret, opt)