def _impl_test_argminmax(self, dtype): x = np.random.normal(scale=5.0, size=(3, 5)) x = x.astype(dtype=dtype, order='C') x_gpu = gpuarray.to_gpu(x) assert_allclose(misc.argmax(x_gpu, axis=0).get(), x.argmax(axis=0), rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.argmax(x_gpu, axis=1).get(), x.argmax(axis=1), rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.argmin(x_gpu, axis=0).get(), x.argmin(axis=0), rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.argmin(x_gpu, axis=1).get(), x.argmin(axis=1), rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) x = x.astype(dtype=dtype, order='F') x_gpu = gpuarray.to_gpu(x) assert_allclose(misc.argmax(x_gpu, axis=0).get(), x.argmax(axis=0), rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.argmax(x_gpu, axis=1).get(), x.argmax(axis=1), rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.argmin(x_gpu, axis=0).get(), x.argmin(axis=0), rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.argmin(x_gpu, axis=1).get(), x.argmin(axis=1), rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype])
def _impl_test_argminmax(self, dtype): x = np.random.normal(scale=5.0, size=(3, 5)) x = x.astype(dtype=dtype, order='C') x_gpu = gpuarray.to_gpu(x) assert np.allclose(misc.argmax(x_gpu, axis=0).get(), x.argmax(axis=0)) assert np.allclose(misc.argmax(x_gpu, axis=1).get(), x.argmax(axis=1)) assert np.allclose(misc.argmin(x_gpu, axis=0).get(), x.argmin(axis=0)) assert np.allclose(misc.argmin(x_gpu, axis=1).get(), x.argmin(axis=1)) x = x.astype(dtype=dtype, order='F') x_gpu = gpuarray.to_gpu(x) assert np.allclose(misc.argmax(x_gpu, axis=0).get(), x.argmax(axis=0)) assert np.allclose(misc.argmax(x_gpu, axis=1).get(), x.argmax(axis=1)) assert np.allclose(misc.argmin(x_gpu, axis=0).get(), x.argmin(axis=0)) assert np.allclose(misc.argmin(x_gpu, axis=1).get(), x.argmin(axis=1))
def argmax(self, dim): if self.device == 'cuda': arg_max = misc.argmax(self.data, axis=dim, keepdims=False) else: arg_max = np.argmax(self.data, axis=dim) return Tensor( data=arg_max, device=self.device, d_type=np.int32, )
def fitSlcGPU(slc, srcFatT2, t2, b1, ff): global ROWSTEP print("Fitting slice", slc) yValues = dicomStack[:, :, slc, :].squeeze() slcShape = yValues.shape nrows = slcShape[0] ncols = slcShape[1] sigLen = slcShape[2] success = False ffParams_gpu = None ffValues_gpu = None if np.any(ff[:,:,slc] > 0): useFF = True ffParams_gpu = findmax_ff.prepareAndLoadParams(parameterCombinations) else: useFF = False while not success: try: for r in range(0,nrows,ROWSTEP): rowMax = min(r+ROWSTEP, nrows) slcLin = yValues[r:rowMax,:,:].reshape(ncols*(rowMax-r), sigLen).astype(np.float32) slcGPU = None slcGPU = pycuda.gpuarray.to_gpu(slcLin) slcGPU = sklinalg.multiply(slcGPU, slcGPU) corrMatrixGPU = sklinalg.mdot(slcGPU, signalsGPU) # correlation tryFree(slcGPU) if useFF: ffValues_gpu = findmax_ff.prepareAndLoadFF(ff[r:rowMax, :, slc]) corrMax = findmax_ff.findmax_gpu(corrMatrixGPU, ffValues_gpu, ffParams_gpu) else: corrMaxGPU = skmisc.argmax(corrMatrixGPU, 1) corrMax = corrMaxGPU.get() tryFree(corrMaxGPU) tryFree(corrMatrixGPU) tryFree(ffValues_gpu) for row in range(r, rowMax): for c in range(ncols): ind = (row-r)*ncols + c t2[row,c,slc] = parameterCombinations[corrMax[ind]][0] b1[row,c,slc] = parameterCombinations[corrMax[ind]][1] ff[row,c,slc] = parameterCombinations[corrMax[ind]][2] if DOPLOT >= 1: plotImages() success = True except pycuda._driver.MemoryError: ROWSTEP -= 1 tryFree(slcGPU) tryFree(corrMatrixGPU) tryFree(ffValues_gpu) gc.collect() print("Not enough GPU Mem: decreasing ROWSTEP to", ROWSTEP)