示例#1
0
文件: opt.py 项目: Ravineel/singa
 def backward_and_update_half(self, loss, threshold = 2097152, clipping = False, clip_Value = 100):
     # THIS IS A EXPERIMENTAL FUNCTION FOR RESEARCH PURPOSE:
     # It converts the gradients to 16 bits half precision format before allreduce
     # To assist training, this functions provide an option to perform gradient clipping
     plist = []
     acc = 0
     glist = []
     for p, g in autograd.backward(loss):
         if clipping:
             g = autograd.clip(g, -clip_Value, clip_Value)
         if g.size() > threshold:
             # larger than threshold -> reduced directly
             self.all_reduce_half(g.data)
         else:
             # smaller than threshold -> accumulate
             glist.append(g.data)                    
             acc += g.size()
             if (acc > threshold):
                 self.fused_all_reduce_half(glist)
                 acc = 0
                 glist = []
         plist.append((p, g))
     if glist:
         self.fused_all_reduce_half(glist)
     self.wait()
     for p, g in plist:
         self.update(p, g)  
示例#2
0
    def backward_and_update_half(self,
                                 loss,
                                 threshold=2097152,
                                 clipping=False,
                                 clip_Value=100):
        """Performs backward propagation and parameter update, with FP16 precision communication.

        THIS IS A EXPERIMENTAL FUNCTION FOR RESEARCH PURPOSE:
        From the loss, it performs backward propagation to get the gradients and do the parameter
        update. For gradient communication, it fuses all the tensor smaller than the threshold value
        to reduce network latency, as well as converting them to FP16 half precision format before
        sending them out. To assist training, this functions provide an option to perform gradient
        clipping.

        Args:
                loss(Tensor): loss is the objective function of the deep learning model
                optimization, e.g. for classification problem it can be the output of the
                softmax_cross_entropy function.
                threshold(int): threshold is a parameter to control performance in fusing
                the tensors. For the tensors of sizes smaller than threshold, they are to
                be accumulated and fused before the all reduce operation. For the tensors
                of its size larger than the threshold value, they are to be reduced directly
                without fusion.
                clipping(bool): a boolean flag to choose whether to clip the gradient value
                clip_value(float): the clip value to be used when clipping is True
        """
        plist = []
        acc = 0
        glist = []
        for p, g in autograd.backward(loss):
            assert p.dtype == tensor.float32, (
                'This function is only available for input tensor precision 32 bit, '
                'which are converted into 16 bits before transmit')
            if clipping:
                g = autograd.clip(g, -clip_Value, clip_Value)
            if g.size() > threshold:
                # larger than threshold -> reduced directly
                self.all_reduce_half(g.data)
            else:
                # smaller than threshold -> accumulate
                glist.append(g.data)
                self.fused_all_reduce_half([g.data], send=False)
                acc += g.size()
                if (acc > threshold):
                    self.fused_all_reduce_half(glist)
                    acc = 0
                    glist = []
            plist.append((p, g))
        if glist:
            self.fused_all_reduce_half(glist)
        self.wait()
        for p, g in plist:
            self.update(p, g)
        self.opt.step()
示例#3
0
    def test_clip(self):
        x = np.array([-0.9, -0.3, -0.1, 0.1, 0.5,
                      0.9]).reshape(3, 2).astype(np.float32)

        x = tensor.from_numpy(x)
        min = -0.5
        max = 0.5
        x.to_device(gpu_dev)

        y = autograd.clip(x, min, max)

        # frontend
        model = sonnx.to_onnx([x, min, max], [y])
        # print('The model is:\n{}'.format(model))

        # backend
        sg_ir = sonnx.prepare(model, device=gpu_dev)
        y_t = sg_ir.run([x, min, max])

        np.testing.assert_array_almost_equal(tensor.to_numpy(y),
                                             tensor.to_numpy(y_t[0]),
                                             decimal=5)