Пример #1
0
 def backward(self, top, propagate_down, bottom):
     with pu.caffe_cuda_context():
         h = caffe.cublas_handle()
         import scikits.cuda.linalg as linalg
         top_diff = top[0].diff_as_pycuda_gpuarray()
         ts = [self.t1_, self.t2_]
         for i in xrange(len(bottom)):
             if not propagate_down[i]:
                 continue
             diff = bottom[i].diff_as_pycuda_gpuarray()
             data = bottom[(i + 1) % 2].data_as_pycuda_gpuarray()
             # Belew 3 conditions are complicated and might be hard to
             # understand.
             swap = ts[i] ^ bool(i)
             t1 = ts[i]
             t2 = (not t1) ^ ts[(i + 1) % 2]
             for b in xrange(bottom[0].shape[0]):
                 x = top_diff[b]
                 y = data[b]
                 t1_, t2_ = t1, t2
                 if swap:
                     x, y = y, x
                     t1_, t2_ = t2_, t1_
                 linalg.dot(x,
                            y,
                            transa=blas_trans(t1_),
                            transb=blas_trans(t2_),
                            handle=h,
                            out=diff[b])
Пример #2
0
 def forward(self, bottom, top):
     with pu.caffe_cuda_context():
         h = caffe.cublas_handle()
         import scikits.cuda.linalg as linalg
         mat1 = bottom[0].data_as_pycuda_gpuarray()
         mat2 = bottom[1].data_as_pycuda_gpuarray()
         mato = top[0].data_as_pycuda_gpuarray()
         for b in xrange(bottom[0].shape[0]):
             linalg.dot(mat1[b],
                        mat2[b],
                        transa=blas_trans(self.t1_),
                        transb=blas_trans(self.t2_),
                        handle=h,
                        out=mato[b])
Пример #3
0
    def reshape(self, bottom, top):
        with pu.caffe_cuda_context():

            batch_size = bottom[0].shape[0]
            if self.batch_size_ != batch_size:
                self.batch_size_ = batch_size
                self.diff_sum_ = gpuarray.zeros((batch_size, 1), dtype)
                self.diff2_sum_ = gpuarray.zeros((batch_size, 1), dtype)
                self.mask_sum_ = gpuarray.zeros((batch_size, 1), dtype)
            dim = int(np.prod(bottom[0].shape[1:]))
            if self.dim_ != dim:
                self.dim_ = dim
                self.multipier_sum_ = gpuarray.zeros((dim, 1), dtype)
                self.multipier_sum_.fill(dtype(1.0))
        top[0].reshape()
Пример #4
0
 def backward(self, top, propagate_down, bottom):
     """
     Compute @f$\frac{\partial {\cal L}}{\partial y_bi}=\frac{\partial {\cal L}}{\partial d_i} \frac{\partial d_i} {\partial y_bi}@f$.
     @f$\frac{\partial {\cal L}}{\partial d_i}=\frac{2}{n}d_i' \left(d_i - \frac{\lambda}{n}\sum_j d_j\right).
     """
     with pu.caffe_cuda_context():
         pred = bottom[0].data_as_pycuda_gpuarray()
         label = bottom[1].data_as_pycuda_gpuarray()
         for i in xrange(len(bottom)):
             if propagate_down[i]:
                 diff = bottom[i].diff_as_pycuda_gpuarray()
                 sgn = 1 if i == 0 else -1
                 self.k_backward_(pred, label, self.diff_sum_,
                                  self.mask_sum_, sgn, top[0].diff, diff)
                 if self.clip_gradient_ is not None:
                     self.k_clip_gradient(diff)
Пример #5
0
    def reshape(self, bottom, top):
        with pu.caffe_cuda_context():

            batch_size = bottom[0].shape[0]
            if self.batch_size_ != batch_size:
                self.batch_size_ = batch_size
                self.diff_sum_ = gpuarray.zeros((batch_size, 1), dtype)
                self.diff2_sum_ = gpuarray.zeros((batch_size, 1), dtype)
                self.mask_sum_ = gpuarray.zeros((batch_size, 1), dtype)
            dim = int(np.prod(bottom[0].shape[1:]))
            if self.dim_ != dim:
                self.dim_ = dim
                self.multipier_sum_ = gpuarray.zeros((dim, 1), dtype)
                self.multipier_sum_.fill(dtype(1.0))
            #if self.img_shape_ != bottom[0].shape:
            # Define mask to be 1. as we don't need to thresholding
            #self.mask_ = gpuarray.zeros(bottom[0].shape, dtype)
            #self.mask_.fill(dtype(1.0))
            #self.img_shape_ = bottom[0].shape
        top[0].reshape()
Пример #6
0
    def forward(self, bottom, top):
        """

        """
        with pu.caffe_cuda_context():
            h = caffe.cublas_handle()
            batch_size = bottom[0].shape[0]
            dim = bottom[0].count / bottom[0].shape[0]
            pred = bottom[0].data_as_pycuda_gpuarray()
            label = bottom[1].data_as_pycuda_gpuarray()
            # Use bottom[0,1].diff as temporary buffer
            diff = bottom[0].diff_as_pycuda_gpuarray()
            diff2 = bottom[1].diff_as_pycuda_gpuarray()
            mask = bottom[0].diff_as_pycuda_gpuarray()
            # Compute diff
            self.k_masked_diff_(diff, pred, label)
            self.k_squared_(diff, diff2)
            import scikits.cuda.linalg as linalg
            # This needs scikits.cuda 0.5.0a3 or later
            # (sudo) pip install scikits.cuda=>0.5.0a3
            linalg.dot(diff.reshape(batch_size, dim),
                       self.multipier_sum_,
                       handle=h,
                       out=self.diff_sum_)
            linalg.dot(diff2.reshape(batch_size, dim),
                       self.multipier_sum_,
                       handle=h,
                       out=self.diff2_sum_)
            mask.fill(dtype(1.0))
            linalg.dot(mask.reshape(batch_size, dim),
                       self.multipier_sum_,
                       handle=h,
                       out=self.mask_sum_)
            self.k_ensure_mask_sum_(self.mask_sum_)
            term1 = self.k_div_sum_(self.diff2_sum_, self.mask_sum_)
            term2 = self.k_div_squared_sum_(self.diff_sum_, self.mask_sum_)
            top[0].data[...] = (term1.get() - self.lambda_ * term2.get()) \
                / batch_size
Пример #7
0
    def setup(self, bottom, top):
        assert len(bottom) == 2
        assert len(top) == 1
        # parameter
        param = eval(self.param_str)
        self.lambda_ = param['lambda']
        self.clip_gradient_ = param.get('clip_gradient', None)
        # Create CUDA function
        with pu.caffe_cuda_context():
            self.k_masked_diff_ = ElementwiseKernel(
                "float *diff, float *pred, float *label",
                "diff[i] = (pred[i] - label[i])", 'masked_diff')
            self.k_squared_ = ElementwiseKernel(
                "float *diff, float *diff2", "diff2[i] = diff[i] * diff[i]",
                'squared')
            self.k_ensure_mask_sum_ = ElementwiseKernel(
                "float *mask_sum", "mask_sum[i] = max(mask_sum[i], 1.0f)",
                'ensure_mask_sum')
            if self.clip_gradient_ is not None:
                self.k_clip_gradient = ElementwiseKernel(
                    "float *diff",
                    "diff[i] = fmaxf(-{0}, fminf(diff[i], {0}))".format(
                        self.clip_gradient_), 'clip_gradient')
            # This should be computed more faster by cublasSdot
            self.k_sum_ = ReductionKernel(dtype,
                                          neutral="0",
                                          reduce_expr="a+b",
                                          map_expr="d[i]",
                                          arguments="float *d")
            self.k_squred_sum_ = ReductionKernel(dtype,
                                                 neutral="0",
                                                 reduce_expr="a+b",
                                                 map_expr="d[i] * d[i]",
                                                 arguments="float *d")
            self.k_div_sum_ = ReductionKernel(dtype,
                                              neutral="0",
                                              reduce_expr="a+b",
                                              map_expr="d[i] / m[i]",
                                              arguments="float *d, float *m")
            self.k_div_squared_sum_ = ReductionKernel(
                dtype,
                neutral="0",
                reduce_expr="a+b",
                map_expr="d[i] * d[i] / (m[i] * m[i])",
                arguments="float *d, float *m")
            func_backward = SourceModule(
                """
#include <caffe/util/device_alternate.hpp>
__global__ void backward(float *pred, float *label,
  float *diff_sum, float *mask_sum, int count, int stride, int sgn,
  int batch_size, float lambda, float loss_weight, float *diff) {
  CUDA_KERNEL_LOOP(i, count) {
    diff[i] = loss_weight * 2.0f * sgn / mask_sum[i / stride]
         / batch_size * ((pred[i] - label[i])
            - lambda / mask_sum[i / stride] * diff_sum[i / stride]);
  }
}
""",
                include_dirs=pu.caffe_include_dirs).get_function("backward")
            func_backward.prepare("PPPPiiiiffP")

            def _func_backward(pred, label, ds, ms, sgn, loss_weight, diff):
                bg = pu.block_and_grid(pred.size)
                batch_size = pred.shape[0]
                count = pred.size
                stride = pred.size / pred.shape[0]
                func_backward.prepared_call(bg['grid'], bg['block'],
                                            pred.gpudata, label.gpudata,
                                            ds.gpudata, ms.gpudata, count,
                                            stride, sgn, batch_size,
                                            self.lambda_, loss_weight,
                                            diff.gpudata)

            self.k_backward_ = _func_backward
        self.batch_size_ = 0
        self.dim_ = 0
        #self.img_shape_ = None
        self.reshape(bottom, top)
Пример #8
0
 def forward(self, bottom, top):
     with pu.caffe_cuda_context():
         self.k_log_(bottom[0].data_as_pycuda_gpuarray(),
                     top[0].data_as_pycuda_gpuarray(),
                     np.float32(self.offset_))