def correlations(X, Y, useGPU): if useGPU: import pycuda.autoinit import pycuda.gpuarray as gpuarray import skcuda.linalg as linalg linalg.init() X_gpu = gpuarray.to_gpu(X) XT_gpu = linalg.transpose(X_gpu) cxx = linalg.mdot(XT_gpu, X_gpu).get() XT_gpu = linalg.transpose(X_gpu) X_gpu.gpudata.free() del X_gpu Y_gpu = gpuarray.to_gpu(Y) cxy = linalg.mdot(XT_gpu, Y_gpu).get() cyx = cxy.T YT_gpu = linalg.transpose(Y_gpu) cyy = linalg.mdot(YT_gpu, Y_gpu).get() else: cxx = np.dot(X.T, X) cxy = np.dot(X.T, Y) cyx = cxy.T cyy = np.dot(Y.T, Y) return cxx, cxy, cyx, cyy
def test_mdot_matrix_complex128(self): a = np.asarray(np.random.rand(4, 2), np.complex128) b = np.asarray(np.random.rand(2, 2), np.complex128) c = np.asarray(np.random.rand(2, 2), np.complex128) a_gpu = gpuarray.to_gpu(a) b_gpu = gpuarray.to_gpu(b) c_gpu = gpuarray.to_gpu(c) d_gpu = linalg.mdot(a_gpu, b_gpu, c_gpu) assert np.allclose(np.dot(a, np.dot(b, c)), d_gpu.get())
def cuda_dot(a, b): print("cuda_dot", a.shape, b.shape) #print(misc.get_dev_attrs(misc.get_current_device())) #exit() a_gpu = gpuarray.to_gpu(a) b_gpu = gpuarray.to_gpu(b) #c_gpu = gpuarray.to_gpu(c) d_gpu = linalg.mdot(a_gpu, b_gpu)#, c_gpu) return matrix(d_gpu.get())
def make_sample_data(set_: int): np.random.seed(set_ * 4347) if set_ == 1: # Uniform distribution data = np.random.uniform(0, 1, size=(samples, num_features)) if set_ == 2: # 3 Gaussian distribution data = multi_gauss_clusters(n_clusters=3) if set_ == 3: # 10 Gaussian distribution data = multi_gauss_clusters(n_clusters=10) df = pd.DataFrame() np.random.shuffle(data) df['vec'] = data.tolist() # find nearest neighbours from sklearn.neighbors import NearestNeighbors nbrs = NearestNeighbors(n_neighbors=51, algorithm='ball_tree', leaf_size=30).fit(data) _, nbrs_indices = nbrs.kneighbors(data) for n_nbr in range(10, 51, 5): df[f"known_neighbours_{n_nbr}"] = [ x[1:(n_nbr + 1)] for x in nbrs_indices ] # hash using random hyperplane LSH import pycuda.gpuarray as gpuarray import skcuda.linalg as linalg import pycuda.autoinit linalg.init() os.environ['CUDA_HOME'] = "/opt/cuda/" vec_np = np.array(df['vec'].values.tolist(), dtype=np.float32) LSH = LSHBias(feature_dim=num_features, bits=LSH_NUM_BITS) W = np.array(LSH.W, dtype=np.float32) b_gpu = gpuarray.to_gpu(W) ones = np.ones(shape=(vec_np.shape[0], 1), dtype=np.float32) X = np.concatenate((vec_np, ones), axis=1) # do the matrix multiplication a_gpu = gpuarray.to_gpu(X) mul = linalg.mdot(a_gpu, b_gpu) # get binary: 1 if value >= 0, else 0 res = gpuarray.if_positive( mul >= gpuarray.zeros(mul.shape, dtype=np.float32), then_=gpuarray.ones_like(mul), else_=gpuarray.zeros_like(mul)) res = np.array(res.get(), dtype=np.uint32) # convert grouped bits to integers res = np_array_binary_to_grouped_integers(res) df[f"hash_{LSH_NUM_BITS}_bits"] = [x for x in res] df.to_parquet(f"{config.CUDA_neighbour_search_df_dir}df-{set_}.parquet", index=False) print("created test-data")
def fitSlcGPU(slc, srcFatT2, t2, b1, ff): global ROWSTEP print("Fitting slice", slc) yValues = dicomStack[:, :, slc, :].squeeze() slcShape = yValues.shape nrows = slcShape[0] ncols = slcShape[1] sigLen = slcShape[2] success = False ffParams_gpu = None ffValues_gpu = None if np.any(ff[:,:,slc] > 0): useFF = True ffParams_gpu = findmax_ff.prepareAndLoadParams(parameterCombinations) else: useFF = False while not success: try: for r in range(0,nrows,ROWSTEP): rowMax = min(r+ROWSTEP, nrows) slcLin = yValues[r:rowMax,:,:].reshape(ncols*(rowMax-r), sigLen).astype(np.float32) slcGPU = None slcGPU = pycuda.gpuarray.to_gpu(slcLin) slcGPU = sklinalg.multiply(slcGPU, slcGPU) corrMatrixGPU = sklinalg.mdot(slcGPU, signalsGPU) # correlation tryFree(slcGPU) if useFF: ffValues_gpu = findmax_ff.prepareAndLoadFF(ff[r:rowMax, :, slc]) corrMax = findmax_ff.findmax_gpu(corrMatrixGPU, ffValues_gpu, ffParams_gpu) else: corrMaxGPU = skmisc.argmax(corrMatrixGPU, 1) corrMax = corrMaxGPU.get() tryFree(corrMaxGPU) tryFree(corrMatrixGPU) tryFree(ffValues_gpu) for row in range(r, rowMax): for c in range(ncols): ind = (row-r)*ncols + c t2[row,c,slc] = parameterCombinations[corrMax[ind]][0] b1[row,c,slc] = parameterCombinations[corrMax[ind]][1] ff[row,c,slc] = parameterCombinations[corrMax[ind]][2] if DOPLOT >= 1: plotImages() success = True except pycuda._driver.MemoryError: ROWSTEP -= 1 tryFree(slcGPU) tryFree(corrMatrixGPU) tryFree(ffValues_gpu) gc.collect() print("Not enough GPU Mem: decreasing ROWSTEP to", ROWSTEP)
def process(self, **kwargs): """Calculate the likelihood, returning ln(likelihood).""" ret = {'value': LIKELIHOOD_FLOOR} self._fractions = kwargs.get('fractions', []) if not len(self._fractions): return ret self._model_observations = kwargs['model_observations'] self._score_modifier = kwargs.get(self.key('score_modifier'), 0.0) self._upper_limits = np.array(kwargs.get('upperlimits', []), dtype=bool) value = ret['value'] if min(self._fractions) < 0.0 or max(self._fractions) > 1.0: return ret for oi, obs in enumerate(self._model_observations): if not self._upper_limits[oi] and (isnan(obs) or not np.isfinite(obs)): return ret diag = kwargs.get('kdiagonal', None) residuals = kwargs.get('kresiduals', None) if diag is None or residuals is None: return ret if kwargs.get('kmat', None) is not None: kmat = kwargs['kmat'] # Add observed errors to diagonal kmat[np.diag_indices_from(kmat)] += diag # full_size = np.count_nonzero(kmat) # Remove small covariance terms # min_cov = self.MIN_COV_TERM * np.max(kmat) # kmat[kmat <= min_cov] = 0.0 # print("Sparse frac: {:.2%}".format( # float(full_size - np.count_nonzero(kmat)) / full_size)) condn = np.linalg.cond(kmat) if condn > 1.0e10: return ret if self._use_cpu is not True and self._model._fitter._cuda: try: import pycuda.gpuarray as gpuarray import skcuda.linalg as skla except ImportError: self._use_cpu = True if not self._cuda_reported: self._printer.message('cuda_not_enabled', master_only=True, warning=True) else: self._use_cpu = False if not self._cuda_reported: self._printer.message('cuda_enabled', master_only=True) self._cuda_reported = True kmat_gpu = gpuarray.to_gpu(kmat) # kmat will now contain the cholesky decomp. skla.cholesky(kmat_gpu, lib='cusolver') value = -np.log(skla.det(kmat_gpu, lib='cusolver')) res_gpu = gpuarray.to_gpu( residuals.reshape(len(residuals), 1)) cho_mat_gpu = res_gpu.copy() skla.cho_solve(kmat_gpu, cho_mat_gpu, lib='cusolver') value -= (0.5 * (skla.mdot(skla.transpose(res_gpu), cho_mat_gpu)).get())[0][0] if self._use_cpu: try: chol_kmat = scipy.linalg.cholesky(kmat, check_finite=False) value = -np.linalg.slogdet(chol_kmat)[-1] value -= 0.5 * (np.matmul( residuals.T, scipy.linalg.cho_solve( (chol_kmat, False), residuals, check_finite=False))) except Exception: try: value = -0.5 * (np.matmul( np.matmul(residuals.T, scipy.linalg.inv(kmat)), residuals) + np.log(scipy.linalg.det(kmat))) except scipy.linalg.LinAlgError: return ret ret['kdiagonal'] = diag ret['kresiduals'] = residuals elif 'kfmat' in kwargs: raise RuntimeError('Should not have kfmat in likelihood!') else: # Shortcut when matrix is diagonal. self._o_band_vs = kwargs['obandvs'] # print('likelihood') # print(np.sqrt(diag)) # print(self._o_band_vs) # print(residuals) value = -0.5 * np.sum(residuals**2 / (self._o_band_vs**2 + diag) + np.log(self._o_band_vs**2 + diag)) score = self._score_modifier + value if isnan(score) or not np.isfinite(score): return ret ret['value'] = max(LIKELIHOOD_FLOOR, score) return ret
import pycuda.gpuarray as gpuarray import pycuda.autoinit import numpy as np import skcuda.linalg as linalg linalg.init() a = np.asarray(np.random.rand(4, 2), np.float32) b = np.asarray(np.random.rand(2, 2), np.float32) c = np.asarray(np.random.rand(2, 2), np.float32) a_gpu = gpuarray.to_gpu(a) b_gpu = gpuarray.to_gpu(b) c_gpu = gpuarray.to_gpu(c) d_gpu = linalg.mdot(a_gpu, b_gpu, c_gpu) print np.allclose(np.dot(a, np.dot(b, c)), d_gpu.get())
b_gpu = gpuarray.to_gpu(W) # reuse this every time count = 0 # hashing different .orc DataFrames for filename in tqdm(glob(basepath + "part-*.orc")): df = pd.read_orc(filename) df = df.rename(columns={"FeatureVector_all_features": "vec"}) count += 1 vec_np = np.array(df['vec'].values.tolist(), dtype=np.float32) # add bias term ones = np.ones(shape=(vec_np.shape[0], 1), dtype=np.float32) X = np.concatenate((vec_np, ones), axis=1) # do the matrix multiplication a_gpu = gpuarray.to_gpu(X) mul = linalg.mdot(a_gpu, b_gpu) # get binary: 1 if value >= 0, else 0 res = gpuarray.if_positive( mul >= gpuarray.zeros(mul.shape, dtype=np.float32), then_=gpuarray.ones_like(mul), else_=gpuarray.zeros_like(mul)) res = np.array(res.get(), dtype=np.uint32) # convert grouped bits to integers res = np_array_binary_to_grouped_integers(res) df[f"hash_{LSH_NUM_BITS}_bits"] = [x for x in res] df = df[["rec_MBID", f"hash_{LSH_NUM_BITS}_bits"]] df.to_parquet(f"{config.ABz_GPU_hashed_output_dir}{count}.parquet", index=False)
import pycuda.autoinit import numpy as np import skcuda.linalg as linalg import skcuda.misc as cumisc linalg.init() # Double precision is only supported by devices with compute # capability >= 1.3: import string demo_types = [np.float32, np.complex64] if cumisc.get_compute_capability(pycuda.autoinit.device) >= 1.3: demo_types.extend([np.float64, np.complex128]) for t in demo_types: print('Testing multiple matrix multiplication for type ' + str(np.dtype(t))) if np.iscomplexobj(t()): a = np.asarray(np.random.rand(8, 4) + 1j * np.random.rand(8, 4), t) b = np.asarray(np.random.rand(4, 4) + 1j * np.random.rand(4, 4), t) c = np.asarray(np.random.rand(4, 4) + 1j * np.random.rand(4, 4), t) else: a = np.asarray(np.random.rand(8, 4), t) b = np.asarray(np.random.rand(4, 4), t) c = np.asarray(np.random.rand(4, 4), t) a_gpu = gpuarray.to_gpu(a) b_gpu = gpuarray.to_gpu(b) c_gpu = gpuarray.to_gpu(c) d_gpu = linalg.mdot(a_gpu, b_gpu, c_gpu) print('Success status: ', np.allclose(np.dot(a, np.dot(b, c)), d_gpu.get()))
def cuda_dot2(a, b, c): a_gpu = gpuarray.to_gpu(a) b_gpu = gpuarray.to_gpu(b) c_gpu = gpuarray.to_gpu(c) d_gpu = linalg.mdot(a_gpu, b_gpu, c_gpu) return d_gpu.get()
def cuda_dot(a, b): a_gpu = gpuarray.to_gpu(a) b_gpu = gpuarray.to_gpu(b) #c_gpu = gpuarray.to_gpu(c) d_gpu = linalg.mdot(a_gpu, b_gpu)#, c_gpu) return matrix(d_gpu.get())