Пример #1
0
 def solve(self, wt_n, y_nd, bend_coef, rot_coef,f_res):
     assert y_nd.shape == (self.n, self.d)
     assert bend_coef in self.bend_coefs
     assert np.allclose(rot_coef, self.rot_coef)
     assert self.valid
     self.initialize_solver(bend_coef, wt_n)
     gemm(self.QN_gpu, self.WQN_gpu, self.NHN_gpu, 
          transa='T', alpha=1, beta=1)
     lhs = self.NHN_gpu.get()
     wy_nd = wt_n[:, None] * y_nd
     rhs = self.NR + self.QN.T.dot(wy_nd)
     z = scipy.linalg.solve(lhs, rhs)
     theta = self.N.dot(z)
     set_ThinPlateSpline(f_res, self.x_nd, theta)
Пример #2
0
    def solve(self, wt_n, y_nd, bend_coef, f_res):
        if y_nd.shape[0] != self.n or y_nd.shape[1] != self.d:
            raise RuntimeError(
                "The dimensions of y_nd doesn't match the dimensions of x_nd")
        if not y_nd.flags.c_contiguous:
            raise RuntimeError("Expected y_nd to be c-contiguous but it isn't")
        self.sqrtWQN_gpu.set_async(np.sqrt(wt_n)[:, None] * self.QN)
        geam(self.NKN_gpu, self.NRN_gpu, self.lhs_gpu, alpha=bend_coef, beta=1)
        gemm(self.sqrtWQN_gpu,
             self.sqrtWQN_gpu,
             self.lhs_gpu,
             transa='T',
             alpha=1,
             beta=1)

        drv.memcpy_dtod_async(self.rhs_gpu.gpudata, self.NR_gpu.gpudata,
                              self.rhs_gpu.nbytes)
        self.y_dnW_gpu.set_async(
            y_nd.T * wt_n)  # use transpose so that it is f_contiguous
        gemm(self.QN_gpu,
             self.y_dnW_gpu,
             self.rhs_gpu,
             transa='T',
             transb='T',
             alpha=1,
             beta=1)

        if lfd.registration._has_cula:
            culinalg.cho_solve(self.lhs_gpu, self.rhs_gpu)
            z = self.rhs_gpu.get()
            culinalg.dot(self.N_gpu, self.rhs_gpu, out=self.theta_gpu)
            theta = self.theta_gpu.get()
        else:  # if cula is not install perform the last two computations in the CPU
            z = np.linalg.solve(self.lhs_gpu.get(), self.rhs_gpu.get())
            theta = self.N.dot(z)
        f_res.update(self.x_nd,
                     y_nd,
                     bend_coef,
                     self.rot_coef,
                     wt_n,
                     theta,
                     N=self.N,
                     z=z)
Пример #3
0
    def solve(self, wt_n, y_nd, bend_coef, f_res):
        if y_nd.shape[0] != self.n or y_nd.shape[1] != self.d:
            raise RuntimeError("The dimensions of y_nd doesn't match the dimensions of x_nd")
        if not y_nd.flags.c_contiguous:
            raise RuntimeError("Expected y_nd to be c-contiguous but it isn't")
        self.sqrtWQN_gpu.set_async(np.sqrt(wt_n)[:,None] * self.QN)
        geam(self.NKN_gpu, self.NRN_gpu, self.lhs_gpu, alpha=bend_coef, beta=1)
        gemm(self.sqrtWQN_gpu, self.sqrtWQN_gpu, self.lhs_gpu, transa='T', alpha=1, beta=1)

        drv.memcpy_dtod_async(self.rhs_gpu.gpudata, self.NR_gpu.gpudata, self.rhs_gpu.nbytes)
        self.y_dnW_gpu.set_async(y_nd.T * wt_n) # use transpose so that it is f_contiguous
        gemm(self.QN_gpu, self.y_dnW_gpu, self.rhs_gpu, transa='T', transb='T', alpha=1, beta=1)
        
        if lfd.registration._has_cula:
            culinalg.cho_solve(self.lhs_gpu, self.rhs_gpu)
            culinalg.dot(self.N_gpu, self.rhs_gpu, out=self.theta_gpu)
            theta = self.theta_gpu.get()
        else: # if cula is not install perform the last two computations in the CPU
            z = np.linalg.solve(self.lhs_gpu.get(), self.rhs_gpu.get())
            theta = self.N.dot(z)
        f_res.set_ThinPlateSpline(self.x_nd, y_nd, bend_coef, self.rot_coef, wt_n, theta=theta)