def get(self, tensor): """ Copy the device tensor to a numpy array. Arguments: tensor (np.ndarray): Optional output array Returns: Numpy array containing tensor data """ if np.sum(self.tensor.strides) != 0: if self.is_contiguous or self.tensor.shape == () or np.prod(self.tensor.shape) == 1: contig_tensor = self.tensor else: # Need to do memcpy from contiguous device memory contig_tensor = self.as_contiguous() if tensor is None: return contig_tensor.get() tensor[:] = contig_tensor.get() else: # Tensor is just a broadcasted scalar, get scalar value and fill output array view = GPUArray((1, ), dtype=self.tensor.dtype, gpudata=self.tensor.gpudata)[0] value = view.get() if tensor is None: out = np.ndarray(self.tensor.shape, dtype=self.tensor.dtype) out.fill(value) return out tensor.fill(value) return tensor
ky = ky.astype(dtype) fk = fk.astype(complex_dtype) # Allocate memory for the nonuniform coefficients on the GPU. c_gpu = GPUArray((n_transf, M), dtype=complex_dtype) # Initialize the plan and set the points. plan = cufinufft(2, (N1, N2), n_transf, eps=eps, dtype=dtype) plan.set_pts(to_gpu(kx), to_gpu(ky)) # Execute the plan, reading from the uniform grid fk c and storing the result # in c_gpu. plan.execute(c_gpu, to_gpu(fk)) # Retreive the result from the GPU. c = c_gpu.get() # Check accuracy of the transform at index jt. jt = M // 2 for i in range(n_transf): # Calculate the true value of the type 2 transform at the index jt. x, y = np.mgrid[-(N1 // 2):(N1 + 1) // 2, -(N2 // 2):(N2 + 1) // 2] c_true = np.sum(fk[i] * np.exp(-1j * (x * kx[jt] + y * ky[jt]))) # Calculate the absolute and relative error. err = np.abs(c[i, jt] - c_true) rel_err = err / np.max(np.abs(c[i])) print(f"[{i}] Absolute error on point [{jt}] is {err:.3g}") print(f"[{i}] Relative error on point [{jt}] is {rel_err:.3g}")
ky = ky.astype(dtype) c = c.astype(complex_dtype) # Allocate memory for the uniform grid on the GPU. fk_gpu = GPUArray((n_transf, N1, N2), dtype=complex_dtype) # Initialize the plan and set the points. plan = cufinufft(1, (N1, N2), n_transf, eps=eps, dtype=dtype) plan.set_pts(to_gpu(kx), to_gpu(ky)) # Execute the plan, reading from the strengths array c and storing the # result in fk_gpu. plan.execute(to_gpu(c), fk_gpu) # Retreive the result from the GPU. fk = fk_gpu.get() # Check accuracy of the transform at position (nt1, nt2). nt1 = int(0.37 * N1) nt2 = int(0.26 * N2) for i in range(n_transf): # Calculate the true value of the type 1 transform at the uniform grid # point (nt1, nt2), which corresponds to the coordinate nt1 - N1 // 2 and # nt2 - N2 // 2. x, y = nt1 - N1 // 2, nt2 - N2 // 2 fk_true = np.sum(c[i] * np.exp(1j * (x * kx + y * ky))) # Calculate the absolute and relative error. err = np.abs(fk[i, nt1, nt2] - fk_true) rel_err = err / np.max(np.abs(fk[i]))