def convert_to_cuDFs(data, labels): # consider dlpack ''' build cuda DataFrames for data and labels from cupy arrays ''' labels_cDF = cudf.DataFrame([('labels', labels)]) data_cDF = cudf.DataFrame([('x', cupy.asfortranarray(data[:, 0])), ('y', cupy.asfortranarray(data[:, 1])), ('z', cupy.asfortranarray(data[:, 2]))]) return data_cDF, labels_cDF
def __mul__(self, other): if cupy.isscalar(other): self.sum_duplicates() return self._with_data(self.data * other) elif isspmatrix_csr(other): self.sum_duplicates() other.sum_duplicates() return cusparse.csrgemm(self, other) elif csc.isspmatrix_csc(other): self.sum_duplicates() other.sum_duplicates() return cusparse.csrgemm(self, other.T, transb=True) elif base.isspmatrix(other): return self * other.tocsr() elif base.isdense(other): if other.ndim == 0: self.sum_duplicates() return self._with_data(self.data * other) elif other.ndim == 1: self.sum_duplicates() return cusparse.csrmv(self, cupy.asfortranarray(other)) elif other.ndim == 2: self.sum_duplicates() return cusparse.csrmm2(self, cupy.asfortranarray(other)) else: raise ValueError('could not interpret dimensions') else: return NotImplemented
def sgemm(A, B, dim_x=16, dim_y=16, blk_m=64, blk_n=64, blk_k=4, dim_xa=64, dim_ya=4, dim_xb=4, dim_yb=64): assert A.dtype == cp.float32 assert B.dtype == cp.float32 assert(dim_x * dim_y == dim_xa * dim_ya == dim_xb * dim_yb) m, k = A.shape k, n = B.shape # Inputs matrices need to be in Fortran order. A = cp.asfortranarray(A) B = cp.asfortranarray(B) C = cp.empty((m, n), dtype=cp.float32, order='F') config = {'DIM_X': dim_x, 'DIM_Y': dim_y, 'BLK_M': blk_m, 'BLK_N': blk_n, 'BLK_K': blk_k, 'DIM_XA': dim_xa, 'DIM_YA': dim_ya, 'DIM_XB': dim_xb, 'DIM_YB': dim_yb, 'THR_M': blk_m // dim_x, 'THR_N': blk_n // dim_y} code = read_code(sgemm_file, params=config) kern = cp.RawKernel(code, 'sgemm') grid = (int(math.ceil(m / blk_m)), int(math.ceil(n / blk_n)), 1) block = (dim_x, dim_y, 1) args = (m, n, k, A, B, C) shared_mem = blk_k * (blk_m + 1) * 4 + blk_n * (blk_k + 1) * 4 kern(grid, block, args=args, shared_mem=shared_mem) return C
def __mul__(self, other): if cupy.isscalar(other): self.sum_duplicates() return self._with_data(self.data * other) elif cupyx.scipy.sparse.isspmatrix_csr(other): self.sum_duplicates() other.sum_duplicates() if cusparse.check_availability('csrgemm'): a = self.T return cusparse.csrgemm(a, other, transa=True) elif cusparse.check_availability('csrgemm2'): a = self.tocsr() a.sum_duplicates() return cusparse.csrgemm2(a, other) else: raise NotImplementedError elif isspmatrix_csc(other): self.sum_duplicates() other.sum_duplicates() if cusparse.check_availability('csrgemm'): a = self.T b = other.T return cusparse.csrgemm(a, b, transa=True, transb=True) elif cusparse.check_availability('csrgemm2'): a = self.tocsr() b = other.tocsr() a.sum_duplicates() b.sum_duplicates() return cusparse.csrgemm2(a, b) else: raise NotImplementedError elif cupyx.scipy.sparse.isspmatrix(other): return self * other.tocsr() elif base.isdense(other): if other.ndim == 0: self.sum_duplicates() return self._with_data(self.data * other) elif other.ndim == 1: self.sum_duplicates() if cusparse.check_availability('csrmv'): csrmv = cusparse.csrmv elif cusparse.check_availability('spmv'): csrmv = cusparse.spmv else: raise NotImplementedError return csrmv(self.T, cupy.asfortranarray(other), transa=True) elif other.ndim == 2: self.sum_duplicates() if cusparse.check_availability('csrmm2'): csrmm = cusparse.csrmm2 elif cusparse.check_availability('spmm'): csrmm = cusparse.spmm else: raise NotImplementedError return csrmm(self.T, cupy.asfortranarray(other), transa=True) else: raise ValueError('could not interpret dimensions') else: return NotImplemented
def generate_synthetic_dataset(config): coilType = config['coilType'] nSamples = config['nSamples'] coilDensity = config['coilDensity'] coil1StDev = config['coil1StDev'] coil2StDev = config['coil2StDev'] nGuidePointsPerCoil = config['nGuidePointsPerCoil'] randomSeed = config['randomSeed'] shuffleFlag = config['shuffleFlag'] startTime = time.time() coil1Centers, coil2Centers = gen_two_coils(nPoints=nGuidePointsPerCoil, coilType=coilType, coilDensity=coilDensity) samplesPerCoil = nSamples // 2 nDims = 3 coil1Data, _ = cuml.make_blobs(n_samples=samplesPerCoil, n_features=nDims, centers=coil1Centers, cluster_std=coil1StDev, random_state=randomSeed, dtype='float') coil2Data, _ = cuml.make_blobs(n_samples=samplesPerCoil, n_features=nDims, centers=coil2Centers, cluster_std=coil2StDev, random_state=randomSeed, dtype='float') combinedData = cupy.empty(shape=(samplesPerCoil * 2, nDims), dtype='float32', order='F') combinedData[0::2] = coil1Data combinedData[1::2] = coil2Data combinedLabels = cupy.empty(shape=(samplesPerCoil * 2, 1), dtype='int', order='F') combinedLabels[0::2] = cupy.ones(shape=(samplesPerCoil, 1), dtype='int') combinedLabels[1::2] = cupy.zeros(shape=(samplesPerCoil, 1), dtype='int') if shuffleFlag: cupy.random.seed(randomSeed) shuffledInds = cupy.random.permutation(combinedData.shape[0]) combinedData = cupy.asfortranarray(combinedData[shuffledInds, :]) combinedLabels = cupy.asfortranarray(combinedLabels[shuffledInds]) data = cudf.DataFrame.from_gpu_matrix(combinedData, columns=['x', 'y', 'z']) labels = cudf.DataFrame.from_gpu_matrix(combinedLabels, columns=['labels']) elapsedTime = time.time() - startTime return data, labels, elapsedTime
def _solve(a, b, cublas_handle, cusolver_handle): a = cupy.asfortranarray(a) b = cupy.asfortranarray(b) dtype = a.dtype m, k = (b.size, 1) if b.ndim == 1 else b.shape dev_info = cupy.empty(1, dtype=numpy.int32) if dtype == 'f': geqrf = cusolver.sgeqrf geqrf_bufferSize = cusolver.sgeqrf_bufferSize ormqr = cusolver.sormqr trans = cublas.CUBLAS_OP_T trsm = cublas.strsm elif dtype == 'd': geqrf = cusolver.dgeqrf geqrf_bufferSize = cusolver.dgeqrf_bufferSize ormqr = cusolver.dormqr trans = cublas.CUBLAS_OP_T trsm = cublas.dtrsm elif dtype == 'F': geqrf = cusolver.cgeqrf geqrf_bufferSize = cusolver.cgeqrf_bufferSize ormqr = cusolver.cormqr trans = cublas.CUBLAS_OP_C trsm = cublas.ctrsm elif dtype == 'D': geqrf = cusolver.zgeqrf geqrf_bufferSize = cusolver.zgeqrf_bufferSize ormqr = cusolver.zormqr trans = cublas.CUBLAS_OP_C trsm = cublas.ztrsm else: raise NotImplementedError(dtype) # 1. QR decomposition (A = Q * R) buffersize = geqrf_bufferSize(cusolver_handle, m, m, a.data.ptr, m) workspace = cupy.empty(buffersize, dtype=dtype) tau = cupy.empty(m, dtype=dtype) geqrf(cusolver_handle, m, m, a.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) cupy.linalg.util._check_cusolver_dev_info_if_synchronization_allowed( geqrf, dev_info) # 2. ormqr (Q^T * B) ormqr(cusolver_handle, cublas.CUBLAS_SIDE_LEFT, trans, m, k, m, a.data.ptr, m, tau.data.ptr, b.data.ptr, m, workspace.data.ptr, buffersize, dev_info.data.ptr) cupy.linalg.util._check_cusolver_dev_info_if_synchronization_allowed( ormqr, dev_info) # 3. trsm (X = R^{-1} * (Q^T * B)) trsm(cublas_handle, cublas.CUBLAS_SIDE_LEFT, cublas.CUBLAS_FILL_MODE_UPPER, cublas.CUBLAS_OP_N, cublas.CUBLAS_DIAG_NON_UNIT, m, k, 1, a.data.ptr, m, b.data.ptr, m) return b
def dense2csr(x): """Converts a dense matrix in CSR format. Args: x (cupy.ndarray): A matrix to be converted. Returns: cupy.sparse.csr_matrix: A converted matrix. """ assert x.ndim == 2 x = cupy.asfortranarray(x) nnz = numpy.empty((), dtype='i') handle = device.get_cusparse_handle() m, n = x.shape descr = MatDescriptor.create() nnz_per_row = cupy.empty(m, 'i') _call_cusparse( 'nnz', x.dtype, handle, cusparse.CUSPARSE_DIRECTION_ROW, m, n, descr.descriptor, x.data.ptr, m, nnz_per_row.data.ptr, nnz.ctypes.data) nnz = int(nnz) data = cupy.empty(nnz, x.dtype) indptr = cupy.empty(m + 1, 'i') indices = cupy.empty(nnz, 'i') _call_cusparse( 'dense2csr', x.dtype, handle, m, n, descr.descriptor, x.data.ptr, m, nnz_per_row.data.ptr, data.data.ptr, indptr.data.ptr, indices.data.ptr) # Note that a desciptor is recreated return cupy.sparse.csr_matrix((data, indices, indptr), shape=x.shape)
def _fftn(a, s, axes, norm, direction, value_type='C2C', order='A', plan=None, overwrite_x=False, out=None): if norm not in (None, 'ortho'): raise ValueError('Invalid norm value %s, should be None or "ortho".' % norm) axes, axes_sorted = _prep_fftn_axes(a.ndim, s, axes) if not axes_sorted: return a a = _convert_dtype(a, value_type) if order == 'A': if a.flags.f_contiguous: order = 'F' elif a.flags.c_contiguous: order = 'C' else: a = cupy.ascontiguousarray(a) order = 'C' elif order not in ['C', 'F']: raise ValueError('Unsupported order: {}'.format(order)) # Note: need to call_cook_shape prior to sorting the axes a = _cook_shape(a, s, axes, value_type, order=order) if order == 'C' and not a.flags.c_contiguous: a = cupy.ascontiguousarray(a) elif order == 'F' and not a.flags.f_contiguous: a = cupy.asfortranarray(a) a = _exec_fftn(a, direction, value_type, norm=norm, axes=axes_sorted, overwrite_x=overwrite_x, plan=plan, out=out) return a
def _fftn(a, s, axes, norm, direction, value_type='C2C', order='A', plan=None, overwrite_x=False, out=None): if norm not in (None, 'ortho'): raise ValueError('Invalid norm value %s, should be None or "ortho".' % norm) axes, axes_sorted = _prep_fftn_axes(a.ndim, s, axes, value_type) if not axes_sorted: if value_type == 'C2C': return a else: raise IndexError('list index out of range') a = _convert_dtype(a, value_type) if order == 'A': if a.flags.f_contiguous: order = 'F' elif a.flags.c_contiguous: order = 'C' else: a = cupy.ascontiguousarray(a) order = 'C' elif order not in ['C', 'F']: raise ValueError('Unsupported order: {}'.format(order)) # Note: need to call _cook_shape prior to sorting the axes a = _cook_shape(a, s, axes, value_type, order=order) for n in a.shape: if n < 1: raise ValueError( 'Invalid number of FFT data points (%d) specified.' % n) if order == 'C' and not a.flags.c_contiguous: a = cupy.ascontiguousarray(a) elif order == 'F' and not a.flags.f_contiguous: a = cupy.asfortranarray(a) # _cook_shape tells us input shape only, and not output shape out_size = _get_fftn_out_size(a.shape, s, axes_sorted[-1], value_type) a = _exec_fftn(a, direction, value_type, norm=norm, axes=axes_sorted, overwrite_x=overwrite_x, plan=plan, out=out, out_size=out_size) return a
def test_get_multigpu(self, dtype): with cuda.Device(1): src = testing.shaped_arange((2, 3), xp=cupy, dtype=dtype) src = cupy.asfortranarray(src) with cuda.Device(0): dst = src.get() expected = testing.shaped_arange((2, 3), xp=numpy, dtype=dtype) np_testing.assert_array_equal(dst, expected)
def test_get_multigpu(self, dtype, order): with cuda.Device(1): src = testing.shaped_arange((2, 3), cupy, dtype, order) src = cupy.asfortranarray(src) with cuda.Device(0): dst = src.get() expected = testing.shaped_arange((2, 3), numpy, dtype, order) np_testing.assert_array_equal(dst, expected)
def _fftn(a, s, axes, norm, direction, value_type='C2C', order='A', plan=None, overwrite_x=False, out=None): if norm not in (None, 'ortho'): raise ValueError('Invalid norm value %s, should be None or "ortho".' % norm) a = _convert_dtype(a, value_type) if (s is not None) and (axes is not None) and len(s) != len(axes): raise ValueError('Shape and axes have different lengths.') if axes is None: if s is None: dim = a.ndim else: dim = len(s) axes = [i for i in six.moves.range(-dim, 0)] axes = tuple(axes) if order == 'A': if a.flags.f_contiguous: order = 'F' elif a.flags.c_contiguous: order = 'C' else: a = cupy.ascontiguousarray(a) order = 'C' elif order not in ['C', 'F']: raise ValueError('Unsupported order: {}'.format(order)) a = _cook_shape(a, s, axes, value_type, order=order) if order == 'C' and not a.flags.c_contiguous: a = cupy.ascontiguousarray(a) elif order == 'F' and not a.flags.f_contiguous: a = cupy.asfortranarray(a) # sort the provided axes in ascending order axes = tuple(sorted(np.mod(axes, a.ndim))) a = _exec_fftn(a, direction, value_type, norm=norm, axes=axes, overwrite_x=overwrite_x, plan=plan, out=out) return a
def test_overwrite_x_with_contiguous_view(self, dtype): # Test case for: https://github.com/cupy/cupy/issues/3079 a = testing.shaped_random(self.shape, cupy, dtype) if self.data_order == 'C': # C-contiguous view b = a[:a.shape[0] // 2, ...] else: # F-contiguous view a = cupy.asfortranarray(a) b = a[..., :a.shape[-1] // 2] b_ptr = b.data.ptr out = cupyx.scipy.fftpack.fftn(b, overwrite_x=True) assert out.data.ptr == b_ptr
def my_conv2(S1, sig, varargin=None): # S1 is the matrix to be filtered along a choice of axes # sig is either a scalar or a sequence of scalars, one for each axis to be filtered # varargin can be the dimensions to do filtering, if len(sig) != x.shape # if sig is scalar and no axes are provided, the default axis is 2 if sig <= .25: return S1 idims = 1 if varargin is not None: idims = varargin idims = _make_vect(idims) if _is_vect(idims) and _is_vect(sig): sigall = sig else: sigall = np.tile(sig, len(idims)) for sig, idim in zip(sigall, idims): Nd = S1.ndim S1 = cp.transpose(S1, [idim] + list(range(0, idim)) + list(range(idim + 1, Nd))) dsnew = S1.shape S1 = cp.reshape(S1, (S1.shape[0], -1), order='F') dsnew2 = S1.shape tmax = ceil(4 * sig) dt = np.arange(-tmax, tmax + 1) gaus = np.exp(-dt**2 / (2 * sig**2)) gaus = gaus[:, np.newaxis] / np.sum(gaus) cNorm = lfilter(gaus, 1, cp.concatenate( (cp.ones(dsnew2[0]), cp.zeros(tmax)))[:, np.newaxis], axis=0) cNorm = cNorm[tmax:, :] S1 = lfilter(gaus, 1, cp.asfortranarray( cp.concatenate( (S1, cp.zeros((tmax, dsnew2[1]), order='F')), axis=0)), axis=0) S1 = S1[tmax:, :] S1 = S1.reshape(dsnew, order='F') S1 = S1 / cNorm S1 = cp.transpose( S1, list(range(1, idim + 1)) + [0] + list(range(idim + 1, Nd))) return S1
def __mul__(self, other): if cupy.isscalar(other): self.sum_duplicates() return self._with_data(self.data * other) elif isspmatrix_csr(other): self.sum_duplicates() other.sum_duplicates() return cusparse.csrgemm(self, other) elif csc.isspmatrix_csc(other): self.sum_duplicates() other.sum_duplicates() return cusparse.csrgemm(self, other.T, transb=True) elif base.isspmatrix(other): return self * other.tocsr() elif base.isdense(other): if other.ndim == 0: self.sum_duplicates() return self._with_data(self.data * other) elif other.ndim == 1: self.sum_duplicates() other = cupy.asfortranarray(other) # csrmvEx does not work if nnz == 0 if self.nnz > 0 and cusparse.csrmvExIsAligned(self, other): if cupy.cuda.cub_enabled and other.flags.c_contiguous: return device_csrmv(self.shape[0], self.shape[1], self.nnz, self.data, self.indptr, self.indices, other) else: return cusparse.csrmvEx(self, other) else: return cusparse.csrmv(self, other) elif other.ndim == 2: self.sum_duplicates() return cusparse.csrmm2(self, cupy.asfortranarray(other)) else: raise ValueError('could not interpret dimensions') else: return NotImplemented
def test_empty_like_reshape_contiguity2_cupy_only(self, dtype, order): a = testing.shaped_arange((2, 3, 4), cupy, dtype) a = cupy.asfortranarray(a) b = cupy.empty_like(a, order=order, shape=self.shape) b.fill(0) c = cupy.empty(self.shape) c.fill(0) shape = self.shape if not numpy.isscalar(self.shape) else (self.shape,) if (order in ['c', 'C'] or (order in ['k', 'K', None] and len(shape) != a.ndim)): assert b.flags.c_contiguous else: assert b.flags.f_contiguous testing.assert_array_equal(b, c)
def _solve(a, b): a = cupy.asfortranarray(a) b = cupy.asfortranarray(b) dtype = a.dtype m, k = (b.size, 1) if b.ndim == 1 else b.shape cusolver_handle = device.get_cusolver_handle() cublas_handle = device.get_cublas_handle() dev_info = cupy.empty(1, dtype=numpy.int32) if dtype == 'f': geqrf = cusolver.sgeqrf geqrf_bufferSize = cusolver.sgeqrf_bufferSize ormqr = cusolver.sormqr trsm = cublas.strsm else: # dtype == 'd' geqrf = cusolver.dgeqrf geqrf_bufferSize = cusolver.dgeqrf_bufferSize ormqr = cusolver.dormqr trsm = cublas.dtrsm # 1. QR decomposition (A = Q * R) buffersize = geqrf_bufferSize(cusolver_handle, m, m, a.data.ptr, m) workspace = cupy.empty(buffersize, dtype=dtype) tau = cupy.empty(m, dtype=dtype) geqrf(cusolver_handle, m, m, a.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) _check_status(dev_info) # 2. ormqr (Q^T * B) ormqr(cusolver_handle, cublas.CUBLAS_SIDE_LEFT, cublas.CUBLAS_OP_T, m, k, m, a.data.ptr, m, tau.data.ptr, b.data.ptr, m, workspace.data.ptr, buffersize, dev_info.data.ptr) _check_status(dev_info) # 3. trsm (X = R^{-1} * (Q^T * B)) trsm(cublas_handle, cublas.CUBLAS_SIDE_LEFT, cublas.CUBLAS_FILL_MODE_UPPER, cublas.CUBLAS_OP_N, cublas.CUBLAS_DIAG_NON_UNIT, m, k, 1, a.data.ptr, m, b.data.ptr, m) return b
def test_fftn_orders(self, dtype, enable_nd): for order in ['C', 'F']: a = testing.shaped_random(self.shape, cupy, dtype) if order == 'F': a = cupy.asfortranarray(a) out = cupy.fft.fftn(a, s=self.s, axes=self.axes) plan_type = _default_plan_type(a, s=self.s, axes=self.axes) if plan_type == 'nd': # nd plans have output with contiguity matching the input self.assertEqual(out.flags.c_contiguous, a.flags.c_contiguous) self.assertEqual(out.flags.f_contiguous, a.flags.f_contiguous) else: # 1d planning case doesn't guarantee preserved contiguity pass
def test_noncontiguous_view(self, dtype): a = testing.shaped_random(self.shape, cupy, dtype) if self.data_order == 'F': a = cupy.asfortranarray(a) sl = numpy.s_[..., ::2] else: sl = numpy.s_[::2, ...] # transform a non-contiguous view without pre-planning view = a[sl] expected = cupyx.scipy.fftpack.fftn(view) # create plan and then apply it to a non-contiguous view plan = cupyx.scipy.fftpack.get_fft_plan(view.copy()) with plan: out = cupyx.scipy.fftpack.fftn(view) testing.assert_allclose(expected, out)
def gpu_ap_approximate(adj, features, alpha, k, fetch): features = features.astype(np.float32) if fetch is None: new_features = features else: new_features = features[fetch] if sp.issparse(new_features): new_features = new_features.todense() smooth_time = 0 adj = cp.sparse.csr_matrix(adj) adj.sum_duplicates() tile_width = 1024**3 // 4 // 2 // features.shape[0] for i in range(0, features.shape[1], tile_width): low = i high = min(features.shape[1], i + tile_width) # transfer data to GPU if sp.issparse(features): new_features_tile = cp.sparse.csr_matrix(features[:, low:high]) features_tile = cp.sparse.csr_matrix(features[:, low:high]) new_features_tile = new_features_tile.todense() features_tile = features_tile.todense() else: new_features_tile = cp.asarray(features[:, low:high]) features_tile = cp.asarray(features[:, low:high]) new_features_tile = cp.asfortranarray(new_features_tile) new_features_tile.device.synchronize() # calculate begin = time.time() for _ in range(k - 1): # new_feature = adj.dot(new_feature) + features new_features_tile = cp.cusparse.csrmm2(adj, new_features_tile, new_features_tile) new_features_tile += features_tile new_features_tile *= alpha / (alpha + 1) new_features_tile.device.synchronize() smooth_time += time.time() - begin # fetch if fetch is None: new_features[:, low:high] = new_features_tile.get() else: new_features[:, low:high] = new_features_tile[fetch].get() return new_features, smooth_time
def test_contiguous_view(self, dtype): # Fortran-ordered case tests: https://github.com/cupy/cupy/issues/3079 a = testing.shaped_random(self.shape, cupy, dtype) if self.data_order == 'F': a = cupy.asfortranarray(a) sl = numpy.s_[..., 0] else: sl = numpy.s_[0, ...] # transform a contiguous view without pre-planning view = a[sl] expected = cupyx.scipy.fftpack.fftn(view) # create plan and then apply it to a contiguous view plan = cupyx.scipy.fftpack.get_fft_plan(view) with plan: out = cupyx.scipy.fftpack.fftn(view) testing.assert_allclose(expected, out)
def gpu_taubin_smoothing(step_transformor, features, repeat, fetch): # TODO: transfer sparse features to GPU # TODO: only fetch necessary data smooth_time = 0 step_transformor = cp.sparse.csr_matrix(step_transformor) step_transformor.sum_duplicates() tile_width = 1024**3 // 4 // 4 // features.shape[0] # initialzie new_features if fetch is None: new_features = features else: new_features = features[fetch] if sp.issparse(new_features): new_features = new_features.todense() for i in range(0, features.shape[1], tile_width): low = i high = min(features.shape[1], i + tile_width) # transfer data to GPU if sp.issparse(features): tile = cp.sparse.csr_matrix(features[:, low:high]) tile = tile.todense() else: tile = cp.asarray(features[:, low:high]) tile = cp.asfortranarray(tile) tile.device.synchronize() # calculate begin = time.time() for i in range(repeat): tile = cp.cusparse.csrmm2(step_transformor, tile, tile) # tile = step_transformor.dot(tile) tile.device.synchronize() smooth_time += time.time() - begin # fetch if fetch is None: new_features[:, low:high] = tile.get() else: new_features[:, low:high] = tile[fetch].get() return new_features, smooth_time
def dense2csc(x): """Converts a dense matrix in CSC format. Args: x (cupy.ndarray): A matrix to be converted. Returns: cupyx.scipy.sparse.csc_matrix: A converted matrix. """ if not check_availability('dense2csc'): raise RuntimeError('dense2csc is not available.') assert x.ndim == 2 x = cupy.asfortranarray(x) nnz = numpy.empty((), dtype='i') handle = device.get_cusparse_handle() m, n = x.shape descr = MatDescriptor.create() nnz_per_col = cupy.empty(m, 'i') _call_cusparse( 'nnz', x.dtype, handle, cusparse.CUSPARSE_DIRECTION_COLUMN, m, n, descr.descriptor, x.data.ptr, m, nnz_per_col.data.ptr, nnz.ctypes.data) nnz = int(nnz) data = cupy.empty(nnz, x.dtype) indptr = cupy.empty(n + 1, 'i') indices = cupy.empty(nnz, 'i') _call_cusparse( 'dense2csc', x.dtype, handle, m, n, descr.descriptor, x.data.ptr, m, nnz_per_col.data.ptr, data.data.ptr, indices.data.ptr, indptr.data.ptr) # Note that a desciptor is recreated csc = cupyx.scipy.sparse.csc_matrix((data, indices, indptr), shape=x.shape) csc._has_canonical_format = True return csc
def __mul__(self, other): if cupy.isscalar(other): self.sum_duplicates() return self._with_data(self.data * other) elif cupyx.scipy.sparse.isspmatrix_csr(other): self.sum_duplicates() other.sum_duplicates() if cusparse.check_availability('csrgemm') and not runtime.is_hip: # trans=True is still buggy as of ROCm 4.2.0 a = self.T return cusparse.csrgemm(a, other, transa=True) elif cusparse.check_availability('csrgemm2'): a = self.tocsr() a.sum_duplicates() return cusparse.csrgemm2(a, other) else: raise NotImplementedError elif isspmatrix_csc(other): self.sum_duplicates() other.sum_duplicates() if cusparse.check_availability('csrgemm') and not runtime.is_hip: # trans=True is still buggy as of ROCm 4.2.0 a = self.T b = other.T return cusparse.csrgemm(a, b, transa=True, transb=True) elif cusparse.check_availability('csrgemm2'): a = self.tocsr() b = other.tocsr() a.sum_duplicates() b.sum_duplicates() return cusparse.csrgemm2(a, b) else: raise NotImplementedError elif cupyx.scipy.sparse.isspmatrix(other): return self * other.tocsr() elif base.isdense(other): if other.ndim == 0: self.sum_duplicates() return self._with_data(self.data * other) elif other.ndim == 1: self.sum_duplicates() if cusparse.check_availability('csrmv') and not runtime.is_hip: # trans=True is buggy as of ROCm 4.2.0 csrmv = cusparse.csrmv elif (cusparse.check_availability('spmv') and not runtime.is_hip): # trans=True is buggy as of ROCm 4.2.0 # (I got HIPSPARSE_STATUS_INTERNAL_ERROR...) csrmv = cusparse.spmv else: raise NotImplementedError return csrmv(self.T, cupy.asfortranarray(other), transa=True) elif other.ndim == 2: self.sum_duplicates() if (cusparse.check_availability('csrmm2') and not runtime.is_hip): # trans=True is buggy as of ROCm 4.2.0 csrmm = cusparse.csrmm2 elif cusparse.check_availability('spmm'): csrmm = cusparse.spmm else: raise NotImplementedError return csrmm(self.T, cupy.asfortranarray(other), transa=True) else: raise ValueError('could not interpret dimensions') else: return NotImplemented
def test_astype_type_f_contiguous_no_copy(self, dtype, order): a = testing.shaped_arange((2, 3, 4), cupy, dtype) a = cupy.asfortranarray(a) b = a.astype(dtype, order=order, copy=False) self.assertTrue(b is a)
def __mul__(self, other): if cupy.isscalar(other): self.sum_duplicates() return self._with_data(self.data * other) elif isspmatrix_csr(other): self.sum_duplicates() other.sum_duplicates() if cusparse.check_availability('csrgemm2'): return cusparse.csrgemm2(self, other) elif cusparse.check_availability('csrgemm'): return cusparse.csrgemm(self, other) else: raise NotImplementedError elif csc.isspmatrix_csc(other): self.sum_duplicates() other.sum_duplicates() if cusparse.check_availability('csrgemm'): return cusparse.csrgemm(self, other.T, transb=True) elif cusparse.check_availability('csrgemm2'): b = other.tocsr() b.sum_duplicates() return cusparse.csrgemm2(self, b) else: raise NotImplementedError elif base.isspmatrix(other): return self * other.tocsr() elif base.isdense(other): if other.ndim == 0: self.sum_duplicates() return self._with_data(self.data * other) elif other.ndim == 1: self.sum_duplicates() other = cupy.asfortranarray(other) # csrmvEx does not work if nnz == 0 if self.nnz > 0 and cusparse.csrmvExIsAligned(self, other): for accelerator in _accelerator.get_routine_accelerators(): if (accelerator == _accelerator.ACCELERATOR_CUB and other.flags.c_contiguous): return cub.device_csrmv(self.shape[0], self.shape[1], self.nnz, self.data, self.indptr, self.indices, other) return cusparse.csrmvEx(self, other) else: if cusparse.check_availability('csrmv'): csrmv = cusparse.csrmv elif cusparse.check_availability('spmv'): csrmv = cusparse.spmv else: raise NotImplementedError return csrmv(self, other) elif other.ndim == 2: self.sum_duplicates() if cusparse.check_availability('csrmm2'): csrmm = cusparse.csrmm2 elif cusparse.check_availability('spmm'): csrmm = cusparse.spmm else: raise NotImplementedError return csrmm(self, cupy.asfortranarray(other)) else: raise ValueError('could not interpret dimensions') else: return NotImplemented
def gesv(a, b): """Solve a linear matrix equation using cusolverDn<t>getr[fs](). Computes the solution to a system of linear equation ``ax = b``. Args: a (cupy.ndarray): The matrix with dimension ``(M, M)``. b (cupy.ndarray): The matrix with dimension ``(M)`` or ``(M, K)``. Returns: cupy.ndarray: The matrix with dimension ``(M)`` or ``(M, K)``. """ if a.ndim != 2: raise ValueError('a.ndim must be 2 (actual: {})'.format(a.ndim)) if b.ndim not in (1, 2): raise ValueError('b.ndim must be 1 or 2 (actual: {})'.format(b.ndim)) if a.shape[0] != a.shape[1]: raise ValueError('a must be a square matrix.') if a.shape[0] != b.shape[0]: raise ValueError('shape mismatch (a: {}, b: {}).'.format( a.shape, b.shape)) dtype = numpy.promote_types(a.dtype.char, 'f') if dtype == 'f': t = 's' elif dtype == 'd': t = 'd' elif dtype == 'F': t = 'c' elif dtype == 'D': t = 'z' else: raise ValueError('unsupported dtype (actual:{})'.format(a.dtype)) helper = getattr(cusolver, t + 'getrf_bufferSize') getrf = getattr(cusolver, t + 'getrf') getrs = getattr(cusolver, t + 'getrs') n = b.shape[0] nrhs = b.shape[1] if b.ndim == 2 else 1 a_data_ptr = a.data.ptr b_data_ptr = b.data.ptr a = cupy.asfortranarray(a, dtype=dtype) b = cupy.asfortranarray(b, dtype=dtype) if a.data.ptr == a_data_ptr: a = a.copy() if b.data.ptr == b_data_ptr: b = b.copy() handle = device.get_cusolver_handle() dipiv = cupy.empty(n, dtype=numpy.int32) dinfo = cupy.empty(1, dtype=numpy.int32) lwork = helper(handle, n, n, a.data.ptr, n) dwork = cupy.empty(lwork, dtype=a.dtype) # LU factrization (A = L * U) getrf(handle, n, n, a.data.ptr, n, dwork.data.ptr, dipiv.data.ptr, dinfo.data.ptr) cupy.linalg.util._check_cusolver_dev_info_if_synchronization_allowed( getrf, dinfo) # Solves Ax = b getrs(handle, cublas.CUBLAS_OP_N, n, nrhs, a.data.ptr, n, dipiv.data.ptr, b.data.ptr, n, dinfo.data.ptr) cupy.linalg.util._check_cusolver_dev_info_if_synchronization_allowed( getrs, dinfo) return b
def create_rand_integers(): randint = cp.random.randint(30, size=(500, 20)).astype(cp.float64) randint = cp.asfortranarray(randint) return randint
print('modularity', mod) vertex = cp.fromDlpack(vertex.to_dlpack()) partition = cp.fromDlpack(partition.to_dlpack()) vertex = cp.reshape(vertex, XYZ_C.shape[0]) labelRE = cp.reshape(partition, XYZ_C.shape[0]) index = cp.argsort(vertex) vertex = cp.take_along_axis(vertex, index, axis=0) labelRE = cp.take_along_axis(labelRE, index, axis=0) print(result) print(vertex) print(labelRE) print(index) louvain_endtime = datetime.datetime.now() print(louvain_endtime - louvain_starttime) print(labelRE) labelRE = cp.asfortranarray(labelRE) labelRE = cd.from_dlpack(labelRE.toDlpack()) #labelRE = cd.DataFrame(labelRE) print(labelRE) df = cd.DataFrame({'label': labelRE}) df.to_csv('HW_AI/HW_Final/gpu.csv') ''' plt.figure(1) ax = plt.axes(projection='3d') z = XYZ_C[:,2] x = XYZ_C[:,0] y = XYZ_C[:,1] c = labelRE ax.scatter(x, y, z, c = c, cmap = plt.get_cmap('jet')) plt.title('Cluster result by modularity')
def extractTemplatesfromSnippets(proc=None, probe=None, params=None, Nbatch=None, nPCs=None): # this function is very similar to extractPCfromSnippets. # outputs not just the PC waveforms, but also the template "prototype", # basically k-means clustering of 1D waveforms. NT = params.NT # skip every this many batches nskip = params.nskip nPCs = nPCs or params.nPCs nt0min = params.nt0min Nchan = probe.Nchan batchstart = np.arange(0, NT * Nbatch + 1, NT).astype(np.int64) k = 0 # preallocate matrix to hold 1D spike snippets # dd = cp.zeros((params.nt0, int(5e4)), dtype=np.float32, order='F') dds = [] for ibatch in tqdm(range(0, Nbatch, nskip), desc="Extracting templates"): offset = Nchan * batchstart[ibatch] dat = proc.flat[offset:offset + NT * Nchan].reshape((-1, Nchan), order='F') # move data to GPU and scale it back to unit variance dataRAW = cp.asarray(dat, dtype=np.float32) / params.scaleproc # find isolated spikes from each batch row, col, mu = isolated_peaks_new(dataRAW, params) # for each peak, get the voltage snippet from that channel c = get_SpikeSample(dataRAW, row, col, params) # if k + c.shape[1] > dd.shape[1]: # dd = cp.pad(dd, (0, dd.shape[1]), mode='constant') # dd[:, k:k + c.shape[1]] = c dds.append(c) k = k + c.shape[1] if k > 1e5: break # discard empty samples # dd = dd[:, :k] dd = cp.asfortranarray(cp.concatenate(dds, axis=1).astype(np.float32)) # initialize the template clustering with random waveforms uu = np.random.permutation(dd.shape[1])[:nPCs] wTEMP = dd[:, uu] wTEMP = wTEMP / cp.sum(wTEMP**2, axis=0)**.5 # normalize them for i in range(10): # at each iteration, assign the waveform to its most correlated cluster cc = cp.dot(wTEMP.T, dd) imax = cp.argmax(cc, axis=0) amax = cc[imax, np.arange(cc.shape[1])] for j in range(nPCs): # weighted average to get new cluster means wTEMP[:, j] = cp.dot(dd[:, imax == j], amax[imax == j].T) wTEMP = wTEMP / cp.sum(wTEMP**2, axis=0)**.5 # unit normalize # the PCs are just the left singular vectors of the waveforms U, Sv, V = svdecon(dd) # take as many as needed wPCA = U[:, :nPCs] # adjust the arbitrary sign of the first PC so its negativity is downward wPCA[:, 0] = -wPCA[:, 0] * cp.sign(wPCA[nt0min, 0]) return wTEMP, wPCA
def __mul__(self, other): if cupy.isscalar(other): self.sum_duplicates() return self._with_data(self.data * other) elif isspmatrix_csr(other): self.sum_duplicates() other.sum_duplicates() if cusparse.check_availability('csrgemm2'): return cusparse.csrgemm2(self, other) elif cusparse.check_availability('csrgemm'): return cusparse.csrgemm(self, other) else: raise NotImplementedError elif csc.isspmatrix_csc(other): self.sum_duplicates() other.sum_duplicates() if cusparse.check_availability('csrgemm') and not runtime.is_hip: # trans=True is still buggy as of ROCm 4.2.0 return cusparse.csrgemm(self, other.T, transb=True) elif cusparse.check_availability('csrgemm2'): b = other.tocsr() b.sum_duplicates() return cusparse.csrgemm2(self, b) else: raise NotImplementedError elif base.isspmatrix(other): return self * other.tocsr() elif base.isdense(other): if other.ndim == 0: self.sum_duplicates() return self._with_data(self.data * other) elif other.ndim == 1: self.sum_duplicates() other = cupy.asfortranarray(other) # need extra padding to ensure not stepping on the CUB bug, # see cupy/cupy#3679 for discussion is_cub_safe = (self.indptr.data.mem.size > self.indptr.size * self.indptr.dtype.itemsize) # CUB spmv is buggy since CUDA 11.0, see # https://github.com/cupy/cupy/issues/3822#issuecomment-782607637 is_cub_safe &= (cub._get_cuda_build_version() < 11000) for accelerator in _accelerator.get_routine_accelerators(): if (accelerator == _accelerator.ACCELERATOR_CUB and not runtime.is_hip and is_cub_safe and other.flags.c_contiguous): return cub.device_csrmv(self.shape[0], self.shape[1], self.nnz, self.data, self.indptr, self.indices, other) if (cusparse.check_availability('csrmvEx') and self.nnz > 0 and cusparse.csrmvExIsAligned(self, other)): # csrmvEx does not work if nnz == 0 csrmv = cusparse.csrmvEx elif cusparse.check_availability('csrmv'): csrmv = cusparse.csrmv elif cusparse.check_availability('spmv'): csrmv = cusparse.spmv else: raise NotImplementedError return csrmv(self, other) elif other.ndim == 2: self.sum_duplicates() if cusparse.check_availability('csrmm2'): csrmm = cusparse.csrmm2 elif cusparse.check_availability('spmm'): csrmm = cusparse.spmm else: raise NotImplementedError return csrmm(self, cupy.asfortranarray(other)) else: raise ValueError('could not interpret dimensions') else: return NotImplemented