def squared_loss(y_true, y_pred): """Compute the squared loss for regression. Parameters ---------- y_true : array-like or label indicator matrix Ground truth (correct) values. y_pred : array-like or label indicator matrix Predicted values, as returned by a regression estimator. Returns ------- loss : float The degree to which the samples are correctly predicted. """ tmp_gpu = gpuarray.GPUArray(y_true.shape, y_true.dtype) if y_true.dtype == np.float64: cuSquaredError(y_true.gpudata, y_pred.gpudata, tmp_gpu.gpudata, np.int32(y_true.size), block=(blockSize, 1, 1), grid=(int((y_true.size - 1) / blockSize + 1), 1, 1)) else: cuSquaredErrorf(y_true.gpudata, y_pred.gpudata, tmp_gpu.gpudata, np.int32(y_true.size), block=(blockSize, 1, 1), grid=(int((y_true.size - 1) / blockSize + 1), 1, 1)) mean = float(cumisc.mean(tmp_gpu).get()) return (mean / 2)
def computeLambda(self): print('\t\tComputing lambda...') T = np.zeros(self.num_columns) if (self.GPU == True): if not self.affine: gpu_data = gpuarray.to_gpu(self.data) C_gpu = linalg.dot(gpu_data, gpu_data, transa='T') for i in xrange(self.num_columns): T[i] = linalg.norm(C_gpu[i, :]) else: gpu_data = gpuarray.to_gpu(self.data) # affine transformation y_mean_gpu = misc.mean(gpu_data, axis=1) # creating affine matrix to subtract to the data (may encounter problem with strides) aff_mat = np.zeros([self.num_rows, self.num_columns]).astype('f') for i in xrange(0, self.num_columns): aff_mat[:, i] = y_mean_gpu.get() aff_mat_gpu = gpuarray.to_gpu(aff_mat) gpu_data_aff = misc.subtract(aff_mat_gpu, gpu_data) C_gpu = linalg.dot(gpu_data, gpu_data_aff, transa='T') #computing euclidean norm (rows) for i in xrange(self.num_columns): T[i] = linalg.norm(C_gpu[i, :]) else: if not self.affine: T = np.linalg.norm(np.dot(self.data.T, self.data), axis=1) else: #affine transformation y_mean = np.mean(self.data, axis=1) tmp_mat = np.outer(y_mean, np.ones( self.num_columns)) - self.data T = np.linalg.norm(np.dot(self.data.T, tmp_mat), axis=1) _lambda = np.amax(T) return _lambda
def _impl_test_mean(self, dtype): x = np.random.normal(scale=5.0, size=(3, 5)) x = x.astype(dtype=dtype, order='C') x_gpu = gpuarray.to_gpu(x) assert_allclose(misc.mean(x_gpu).get(), x.mean(), rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.mean(x_gpu, axis=0).get(), x.mean(axis=0), rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.mean(x_gpu, axis=1).get(), x.mean(axis=1), rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) x = x.astype(dtype=dtype, order='F') x_gpu = gpuarray.to_gpu(x) assert_allclose(misc.mean(x_gpu).get(), x.mean(), rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.mean(x_gpu, axis=-1).get(), x.mean(axis=-1), rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.mean(x_gpu, axis=-2).get(), x.mean(axis=-2), rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype])
def _correlate_matmul_cublas(self, frames_flat, mask): arr = np.ascontiguousarray(frames_flat[:, mask], dtype=np.float32) npix = arr.shape[1] # Pre-allocating memory for all bins might save a bit of time, # but would take more memory d_arr = garray.to_gpu(arr) d_outer = cublas.dot(d_arr, d_arr, transb="T", handle=self.cublas_handle) d_means = skmisc.mean(d_arr, axis=1, keepdims=True) d_denom_mat = cublas.dot(d_means, d_means, transb="T", handle=self.cublas_handle) self.sum_diagonals(d_outer, self.d_sumdiags1) self.sum_diagonals(d_denom_mat, self.d_sumdiags2) self.d_sumdiags1 /= self.d_sumdiags2 self.d_sumdiags1 /= npix return self.d_sumdiags1.get()
def _compute_loss_grad(self, layer, n_samples, activations, deltas, coef_grads, intercept_grads): """Compute the gradient of loss with respect to coefs and intercept for specified layer. This function does backpropagation for the specified one layer. """ coef_grads[layer] = safe_sparse_dot(activations[layer], deltas[layer], 'T', 'N') coef_grads[layer] = (coef_grads[layer] + (self.alpha * self.coefs_[layer])) / n_samples intercept_grads[layer] = cumisc.mean(deltas[layer], 0) return coef_grads, intercept_grads
def impl_test_mean(self, dtype): x = np.random.normal(scale=5.0, size=(3, 5)) x = x.astype(dtype=dtype, order='C') x_gpu = gpuarray.to_gpu(x) assert np.allclose(misc.mean(x_gpu).get(), x.mean()) assert np.allclose(misc.mean(x_gpu, axis=0).get(), x.mean(axis=0)) assert np.allclose(misc.mean(x_gpu, axis=1).get(), x.mean(axis=1)) x = x.astype(dtype=dtype, order='F') x_gpu = gpuarray.to_gpu(x) assert np.allclose(misc.mean(x_gpu).get(), x.mean()) assert np.allclose(misc.mean(x_gpu, axis=-1).get(), x.mean(axis=-1)) assert np.allclose(misc.mean(x_gpu, axis=-2).get(), x.mean(axis=-2))
def run_gpu(self, Niters): """ Run G-S on GPU. The result is overwritten on the attribute "self.wdata" containing a pycuda array. """ Nz, Ny, Nx = self.shape # Allocate output data wdata = gpuarray.empty((Ny, Nx), np.complex64) sim = gpuarray.empty((Nz, Ny, Nx), np.complex64) Isim = gpuarray.empty((Nz, Ny, Nx), np.complex64) for io in trange(Niters): # Propagate the initial wave to simulate defocused waves # Psi(x,y,z) = convolve[Psi(x,y,0), CTF(x,y,z)] cu_fft.fft(self.wdata, wdata, self.pft2dcc) for kk in range(Nz): sim[kk, :, :] = self.ctfd[kk, :, :] * wdata cu_fft.ifft(sim, sim, self.pft3dcc, True) if hasattr(self, 'Esdata'): # Use the intensities, Isim = |Psi|**2 # Convolve with spatial-coherence envelope # Isim = convolve[Isim, Es] Isim = sim * sim.conj() cu_fft.fft(Isim, Isim, self.pft3dcc) cu_fft.ifft(Isim * self.Esdata, Isim, self.pft3dcc, True) # Combine experimental and simulated amplitudes with simulated phase # Psi' = [abs(Psi)+sqrt(Iexp)-sqrt(Isim)]*exp[i*arg(Psi)] self.cuwave(self.Iexp, sim, Isim.real, Isim) else: # Combine experimental amplitudes with simulated phase # Psi' = [sqrt(Iexp)]*exp[i*arg(Psi)] self.cuwave(self.Iexp, sim, Isim) sim = gpuarray.if_positive(self.mask, Isim, sim) # then back-propagate to the exit plane and take average # Psi(x,y,0) = < convolve[Psi, CTF*] >_z cu_fft.fft(sim, sim, self.pft3dcc) sim = sim * self.ctfd.conj() cu_fft.ifft(sim, sim, self.pft3dcc, True) wdata = misc.mean(sim.reshape(Nz, Nx * Ny), 0).reshape(Ny, Nx) # update phase and wave self.cuphase(wdata, self.wdata, self.phase_data) self.wdata = wdata.copy()
from pycuda import gpuarray from pycuda import cumath # I. subjects to monkey patching: from ..particles import particles as def_particles from ..particles import slicing as def_slicing from ..trackers import simple_long_tracking as def_simple_long_tracking from ..trackers import wrapper as def_wrapper # II. actual monkey patching # a) Particles rebindings for GPU from .particles import ParticlesGPU def_particles.Particles = ParticlesGPU def_particles.mean = lambda *args, **kwargs: mean(*args, **kwargs).get() def_particles.std = lambda *args, **kwargs: std(*args, **kwargs).get() # b) Slicing rebindings for GPU # def_slicing.min_ = lambda *args, **kwargs: gpuarray.min(*args, **kwargs).get() # def_slicing.max_ = lambda *args, **kwargs: gpuarray.max(*args, **kwargs).get() # def_slicing.diff = diff from .slicing import SlicerGPU # # to be replaced: find a better solution than monkey patching base classes! # # (to solve the corresponding need to replace all inheriting classes' parent!) # def_slicing.Slicer = SlicerGPU # def_slicing.UniformBinSlicer.__bases__ = (SlicerGPU,) # c) Longitudinal tracker rebindings for GPU
b_host = b_host.astype(np.float32) size_rgb = m.shape # Convierte la pancromatica a float32 p1_host = p.astype(np.float32) # Inicial el tiempo de ejecucion start = time.time() # Se pasan los array en el host al device r_gpu = gpuarray.to_gpu(r_host) g_gpu = gpuarray.to_gpu(g_host) b_gpu = gpuarray.to_gpu(b_host) p_gpu = gpuarray.to_gpu(p1_host) # Se calcula la media de cada una de las bandas y se forma un arreglo con estos valores, todo esto en GPU mean_r_gpu = misc.mean(r_gpu) mean_g_gpu = misc.mean(g_gpu) mean_b_gpu = misc.mean(b_gpu) # Se obtiene el numero de bandas n_bands = size_rgb[2] # Se aparta memoria en GPU r_gpu_subs = gpuarray.zeros_like(r_gpu, np.float32) g_gpu_subs = gpuarray.zeros_like(g_gpu, np.float32) b_gpu_subs = gpuarray.zeros_like(b_gpu, np.float32) # Se realiza la resta de su respectiva media a cada uno de los pixeles de cada banda, substract(r_gpu, mean_r_gpu.get(), r_gpu_subs) substract(g_gpu, mean_g_gpu.get(), g_gpu_subs) substract(b_gpu, mean_b_gpu.get(), b_gpu_subs)
def fusion_images(multispectral, panchromatic, save_image=False, savepath=None, timeCondition=True): end = 0 start = 0 #Verifica que ambas imagenes cumplan con las condiciones if multispectral.shape[2] == 3: print('The Multispectral image has '+str(multispectral.shape[2])+' channels and size of '+str(multispectral.shape[0])+'x'+str(multispectral.shape[1])) else: sys.exit('The first image is not multispectral') if len(panchromatic.shape) == 2: print(' The Panchromatic image has a size of '+str(panchromatic.shape[0])+'x'+str(panchromatic.shape[1])) else: sys.exit('The second image is not panchromatic') size_rgb = multispectral.shape # Definición del tamaño del bloque BLOCK_SIZE = 32 # Convierte a float32 y separa las bandas RGB de la multispectral m_host = multispectral.astype(np.float32) r_host = m_host[:,:,0].astype(np.float32) g_host = m_host[:,:,1].astype(np.float32) b_host = m_host[:,:,2].astype(np.float32) size_rgb = multispectral.shape # Convierte la pancromatica a float32 panchromatic_host = panchromatic.astype(np.float32) # Inicial el time_calculated de ejecucion start=time.time() # Se pasan los array en el host al device r_gpu = gpuarray.to_gpu(r_host) g_gpu = gpuarray.to_gpu(g_host) b_gpu = gpuarray.to_gpu(b_host) p_gpu = gpuarray.to_gpu(panchromatic_host) # Se calcula la media de cada una de las bandas y se forma un arreglo con estos valores, todo esto en GPU mean_r_gpu = misc.mean(r_gpu) mean_g_gpu = misc.mean(g_gpu) mean_b_gpu = misc.mean(b_gpu) # Se obtiene el numero de bandas n_bands = size_rgb[2] # Se aparta memoria en GPU r_gpu_subs = gpuarray.zeros_like(r_gpu,np.float32) g_gpu_subs = gpuarray.zeros_like(g_gpu,np.float32) b_gpu_subs = gpuarray.zeros_like(b_gpu,np.float32) # Se realiza la resta de su respectiva media a cada uno de los pixeles de cada banda, substract( r_gpu, mean_r_gpu.get(), r_gpu_subs) substract( g_gpu, mean_g_gpu.get(), g_gpu_subs) substract( b_gpu, mean_b_gpu.get(), b_gpu_subs) # Se divide cada una de las bandas después de ser restada su media, en un conjunto de submatrices cuadradas del tamaño del bloque r_subs_split = split(r_gpu_subs.get(),BLOCK_SIZE,BLOCK_SIZE) g_subs_split = split(g_gpu_subs.get(),BLOCK_SIZE,BLOCK_SIZE) b_subs_split = split(b_gpu_subs.get(),BLOCK_SIZE,BLOCK_SIZE) #Se obtiene la matrix de varianza y covarianza mat_var_cov = varianza_cov(r_subs_split,g_subs_split,b_subs_split) # Coeficiente para diaganalizar ortogonalmente coefficient = 1.0/((size_rgb[0]*size_rgb[1])-1) # Matriz diagonalizada ortogonalmente ortogonal_matrix = mat_var_cov*coefficient # Se calcula la traza de las sucesivas potencias de la matriz ortogonal inicial polynomial_trace = successive_powers(ortogonal_matrix) # Se calculan los coeficientes del polinomio caracteristico characteristic_polynomial = polynomial_coefficients(polynomial_trace, ortogonal_matrix) # Se obtienen las raices del polinomio caracteristico characteristic_polynomial_roots = np.roots(np.insert(characteristic_polynomial,0,1)) # Los vectores propios aparecen en la diagonal de la matriz eigenvalues_mat eigenvalues_mat = np.diag(characteristic_polynomial_roots) # Vectores propios para cada valor propio eigenvectors_mat = -1*ortogonal_matrix[1:n_bands,0] # Se calcular los vectores propios normalizados # Cada vector propio es una columna de la matriz mat_ortogonal_base mat_ortogonal_base, q_matrix = eigenvectors_norm(eigenvalues_mat, ortogonal_matrix, eigenvectors_mat) q_matrix_list = q_matrix.tolist() q_matrix_cpu = np.array(q_matrix_list).astype(np.float32) w1 = q_matrix_cpu[0,:] w2 = (-1)*q_matrix_cpu[1,:] w3 = q_matrix_cpu[2,:] eigenvectors = np.array((w1,w2,w3)) # Se calcula la inversa de los vectores propios inv_eigenvectors = la.inv(eigenvectors) inv_list = inv_eigenvectors.tolist() inv_eigenvector_cpu = np.array(inv_list).astype(np.float32) # Se realiza la división de las bandas en submatrices del tamaño del bloque r_subs_split_cp = split(r_host,BLOCK_SIZE,BLOCK_SIZE) g_subs_split_cp = split(g_host,BLOCK_SIZE,BLOCK_SIZE) b_subs_split_cp = split(b_host,BLOCK_SIZE,BLOCK_SIZE) # Se calculan los componentes principales con las bandas originales y los vectores propios pc_1,pc_2,pc_3 = componentes_principales_original(r_subs_split_cp,g_subs_split_cp,b_subs_split_cp,q_matrix_cpu,r_host.shape[0], BLOCK_SIZE) # Se realiza la división en submatrices de la pancromática, el componente principal 2 y 3, del tamaño del bloque, p_subs_split_nb = split(panchromatic_host,BLOCK_SIZE,BLOCK_SIZE) pc_2_subs_split_nb = split(pc_2,BLOCK_SIZE,BLOCK_SIZE) pc_3_subs_split_nb = split(pc_3,BLOCK_SIZE,BLOCK_SIZE) # Se calculan los componentes con la pancromatica, componentes principales originales 2 y 3, y la inversa de los vectores propios nb1,nb2,nb3 = componentes_principales_panchromartic(p_subs_split_nb,pc_2_subs_split_nb,pc_3_subs_split_nb,inv_eigenvector_cpu,r_host.shape[0], BLOCK_SIZE) nb11 = nb1.astype(np.float32) nb22 = nb2.astype(np.float32) nb33 = nb3.astype(np.float32) nb11_gpu = gpuarray.to_gpu(nb11) nb22_gpu = gpuarray.to_gpu(nb22) nb33_gpu = gpuarray.to_gpu(nb33) # Se separa espacio en memoria para las matrices resultado de realizar el ajuste nb111_gpu = gpuarray.empty_like(nb11_gpu) nb222_gpu = gpuarray.empty_like(nb22_gpu) nb333_gpu = gpuarray.empty_like(nb33_gpu) # Se realiza un ajuste cuando los valores de cada pixel es menor a 0, en GPU negative_adjustment(nb11_gpu,nb111_gpu) negative_adjustment(nb22_gpu,nb222_gpu) negative_adjustment(nb33_gpu,nb333_gpu) nb111_cpu = nb111_gpu.get().astype(np.uint8) nb222_cpu = nb222_gpu.get().astype(np.uint8) nb333_cpu = nb333_gpu.get().astype(np.uint8) end = time.time() fusioned_image=np.stack((nb111_cpu,nb222_cpu,nb333_cpu),axis=2); if(save_image): # Guarda la imagen resultando de acuerdo al tercer parametro establecido en la linea de ejecución del script if(savepath != None): t = skimage.io.imsave(savepath+'/pcagpu_image.tif',fusioned_image, plugin='tifffile') else: t = skimage.io.imsave('pcagpu_image.tif',fusioned_image, plugin='tifffile') #time_calculated de ejecución para la transformada de Brovey en GPU time_calculated = (end-start) if(timeCondition): return {"image": fusioned_image, "time" : time_calculated} else: return fusioned_image