def dot(A, B, opa = 'n', opb = 'n', C = None, Cstart = None, scale = 1.0, Cscale = 0.0, handle = None): """ Multiplication of two matrices A and B in PitchArray format if C is specified, use the memory in C. Specified C must have the same leading dimension as that of the result and the other dimension must be bigger or equal to that of the result. Parameters: ----------- A: parray.PitchArray B: parray.PitchArray opa: str operation on A 'n' or 'N': use A itself 't' or 'T': use transpose of A 'c' or 'C': use conjugate transpose of A opb: str operation on B 'n' or 'N': use B itself 't' or 'T': use transpose of B 'c' or 'C': use conjugate transpose of B C: parray.PitchArray if specified, the result will be stored in C Cstart: int the offset start of C array scale: float scaling factor for A*B see Cscale Cscale: float scaling factor for C result will be C = C*Cscale + scale*A*B Note: ----- works only for CUDA VERSION > 4.0 where handle is introduced. Will NOT work for complex case when A and B shares overlapping memory, but should work if A==B. """ if A.dtype != B.dtype: raise TypeError("matrix multiplication must have same dtype") if (len(A.shape) != 2) | (len(B.shape) != 2): raise TypeError("A, B must both be matrices") if opa in ['n', 'N']: m,n = A.shape elif opa in ['t','T', 'c','C']: n,m = A.shape else: raise ValueError("unknown value assigned to opa") if opb in ['n', 'N']: k,l = B.shape elif opb in ['t','T', 'c','C']: l,k = B.shape else: raise ValueError("unknown value assigned to opa") if (k != n) | (0 in [m,n,l]): raise ValueError("matrix dimension mismatch, " "(%d,%d) with (%d,%d)" % (m,n,k,l)) dtype = A.dtype if dtype in [np.float32, np.float64]: if opb in ['c', 'C']: opb = 't' if opa in ['c', 'C']: opa = 't' scale = dtype.type(scale) Cscale = dtype.type(Cscale) if dtype == np.float64: tp = 'cublas.cublasD' complex_type = False elif dtype == np.complex128: tp = 'cublas.cublasZ' complex_type = True elif dtype == np.float32: tp = 'cublas.cublasS' complex_type = False elif dtype == np.complex64: tp = 'cublas.cublasC' complex_type = True if C is None: C = parray.empty((m,l), dtype) Cstart = 0 Cempty = True Cscale = dtype.type(0) else: Cempty = False if Cstart is None: Cstart = 0 if C.shape[1] != l: raise AttributeError("shape of the provided result array " + C.shape.__str__() + " does not match intended result " + (m,l).__str__()) if C.shape[0] < m + Cstart: raise AttributeError("shape of the provided result array " + C.shape.__str__() + " does not match intended result " + (m,l).__str__()) if C.dtype != dtype: raise TypeError("Result array C provided must have " "the same dtype as inputs") conjA = False conjB = False conjC = False sameflag = (A==B) itemsize = C.dtype.itemsize handlestr = "handle.handle" if m == 1: if n == 1: alpha = A.get()[0,0] if opa in ['c','C']: alpha = np.conj(alpha) C*=Cscale if opb in ['c','C']: func = (tp+"axpy(handle.handle, l, alpha*scale, " + "parray.conj(B).gpudata, 1," + "int(C.gpudata)+Cstart*itemsize, 1)") else: func = (tp+"axpy(handle.handle, l, alpha*scale, " + "B.gpudata, 1, " + "int(C.gpudata)+Cstart*itemsize, 1)") else: if l > 1: alpha = scale beta = Cscale if opa in ['c','C']: A.conj() conjA = True func = (tp+"gemv(handle.handle, '"+opb+"',B.shape[1], " + "B.shape[0], alpha, B.gpudata, B.ld, A.gpudata, " + "1, beta, int(C.gpudata)+Cstart*itemsize*C.ld, 1)") else: if opa in ['c','C']: if opb in ['c', 'C']: func = ("C.set(np.array(scale*" + tp + "dotu(handle.handle, n, A.gpudata, " + "1, B.gpudata, 1)" +").conj()+C.get()*Cscale)") else: func = ("C.set(np.array(scale*" + tp + "dotc(handle.handle, n, A.gpudata, " + "1, B.gpudata, 1)) + C.get()*Cscale)") elif opb in ['c', 'C']: func = ("C.set(np.array(scale*" + tp + "dotc(handle.handle, n, B.gpudata, 1, " + "A.gpudata, 1)) + C.get()*Cscale)") else: if complex_type: func = ("C.set(np.array(scale*" + tp + "dotu(handle.handle, n, A.gpudata, 1, " + "B.gpudata, 1)) + C.get()*Cscale)") else: func = ("C.set(np.array(scale*" + tp + "dot(handle.handle, n, A.gpudata, 1, " + "B.gpudata, 1)) + C.get()*Cscale)") else:#m!=1 if n == 1: if l == 1: alpha = B.get()[0,0] if opb in ['c','C']: alpha = np.conj(alpha) C*=Cscale if opa in ['c','C']: func = (tp+"axpy(handle.handle, m, alpha*scale, " + "parray.conj(A).gpudata, 1, " + "int(C.gpudata)+Cstart*itemsize, 1)") else: func = (tp+"axpy(handle.handle, m, alpha*scale, " + "A.gpudata, 1, " + "int(C.gpudata)+Cstart*itemsize, 1)") else: if Cempty: C.fill(0) else: C*=Cscale if opa in ['c','C']: if opb in ['c', 'C']: B.conj() conjB = True func = (tp + "gerc(handle.handle, l, m, scale, " + "B.gpudata, 1, A.gpudata, 1, " + "int(C.gpudata)+Cstart*itemsize*C.ld, " + "C.ld)") else: func = (tp + "gerc(handle.handle, l, m, scale, " + "B.gpudata, 1, A.gpudata, 1, " + "int(C.gpudata)+Cstart*itemsize*C.ld, " + "C.ld)") elif opb in ['c', 'C']: if sameflag: B.conj() conjB = True func = (tp + "gerc(handle.handle, l, m, scale, " + "B.gpudata, 1, A.gpudata, 1, " + "int(C.gpudata)+Cstart*itemsize*C.ld, C.ld)") else: B.conj() conjB = True func = (tp + "geru(handle.handle, l, m, scale, " + "B.gpudata, 1, A.gpudata, 1, " + "int(C.gpudata)+Cstart*itemsize*C.ld, C.ld)") else: if complex_type: func = (tp + "geru(handle.handle, l, m, scale, " + "B.gpudata, 1, A.gpudata, 1, " + "int(C.gpudata)+Cstart*itemsize*C.ld, C.ld)") else: func = (tp + "ger(handle.handle, l, m, scale, " + "B.gpudata, 1, A.gpudata, 1, " + "int(C.gpudata)+Cstart*itemsize*C.ld, C.ld)") else: if l == 1: if opb in ['c', 'C']: if opa in ['c', 'C']: conjC = True if not Cempty: C.conj() Cscale = Cscale.conj() func = (tp + "gemv(handle.handle, 'n', A.shape[1], " + "A.shape[0], scale, A.gpudata, A.ld, " + "B.gpudata, 1, Cscale, int(C.gpudata) + " + "Cstart * itemsize * C.ld, 1)") else: B.conj() conjB = True if opa in ['t', 'T']: opa = 'n' else: opa = 't' func = (tp + "gemv(handle.handle, '" + opa + "', " + "A.shape[1], A.shape[0], scale, A.gpudata, " + "A.ld, B.gpudata, 1, Cscale, " + "int(C.gpudata)+Cstart*itemsize*C.ld, 1)") else: if opa in ['c', 'C']: B.conj() conjB = True conjC = True if not Cempty: C.conj() Cscale = Cscale.conj() func = (tp + "gemv(handle.handle, 'n', A.shape[1], " + "A.shape[0], scale, A.gpudata, A.ld, " + "B.gpudata, 1, Cscale, int(C.gpudata) + " + "Cstart * itemsize * C.ld, 1)") else: if opa in ['t', 'T']: opa = 'n' else: opa = 't' func = (tp + "gemv(handle.handle, '" + opa + "', " + "A.shape[1], A.shape[0], scale, A.gpudata, " + "A.ld, B.gpudata, 1, Cscale, int(C.gpudata) " + "+ Cstart * itemsize * C.ld, 1)") else: func = (tp+"gemm(handle.handle, '" + opb + "','" + opa + "', " + "l, m, k, scale, B.gpudata, B.ld, A.gpudata, A.ld, " + "Cscale, int(C.gpudata) + " + "Cstart * itemsize * C.ld, C.ld)") if handle is None: handle = cublashandle() eval(func) if conjC: C.conj() if conjA: A.conj() if conjB: B.conj() return C
def eig_sym(G, compute_z=True, uplo='U'): """ compute Eigenvalue Decompositon of a symmetric or Hermitian matrix G G = V D V^{*} Parameters ------------------------------------- G: PitchArray, GPUArray or numpy.ndarray if G is GPUArray or PitchArray, its gpudata will be destroyed after calling the function compute_z: bool whether return eigenvectors uplo: str 'U' or 'u' assumes the entries of G are stored in upper triangular part, lower off diagonal triangular part is not referenced 'L' or 'l' assumes the entries of G are stored in lower triangular part, upper off diagonal triangular part is not referenced Returns ------------------------------------- D: PitchArray a row vector containing all eigenvalues with ascending order V: PitchArray if compute_z, jth column of V contains orthonormal eigenvector associated with jth eigenvalue Examples D = eig_sym(G, compute_z = False) D,V = eig_sym(G, compute_z = True) """ if cula._libcula_toolkit != 'premium': raise ValueError("eigenvalue decomposition is only supported " "in premium version of CULA") if G.__class__ is not parray.PitchArray: if G.__class__ is garray.GPUArray: h_G = G.get() del G.gpudata A = parray.to_gpu(h_G) elif G.__class__ is np.ndarray: A = parray.to_gpu(G) else: raise TypeError("G must be either parray, or GPUArray or ndarray") else: A = G if len(A.shape) != 2: raise TypeError("eig only works on 2D matrix") if A.shape[0] != A.shape[1]: raise ValueError("G must be square matrix") if uplo in ['u', 'U']: uplo = 'L' elif uplo in ['l', 'L']: uplo = 'U' else: raise ValueError("uplo must be 'U' or 'L'") real_dtype = np.dtype(np.float32) if A.dtype == np.complex64: eig_func = cula.culaDeviceCheev elif A.dtype == np.float32: eig_func = cula.culaDeviceSsyev else: if A.dtype == np.complex128: eig_func = cula.culaDeviceZheev elif A.dtype == np.float64: eig_func = cula.culaDeviceDsyev else: raise ValueError('unsupported type') real_dtype = np.dtype(np.float64) D = parray.empty(A.shape[0], real_dtype) cula.culaInitialize() handle = cublashandle() if compute_z: jobz = 'V' else: jobz = 'N' eig_func(handle.handle, jobz, uplo, A.shape[0], A.gpudata, A.ld, D.gpudata) #cula.culaShutdown() if compute_z: return D, A.conj().T() else: return D
def svd(G, compute_u=True, compute_v=True, econ=False): """ compute Singular Value Decompositon of G G = U*(diag(S))*V Parameters ---------------------------------------- G: PitchArray, GPUArray or numpy.ndarray of shape (m,n) if G is GPUArray or PitchArray, its gpudata will be destroyed after calling the function compute_u: bool whether return U matrix or not compute_v: bool whether return V matrix or not econ: bool return economical matrix Returns: U: parray.PitchArray matrix as U in G = U*(diag(S))*V, if econ, returns the first min(m,n) columns of U S: parray.PitchArray vector a row vector containing all singular values with descending order V: parray.PitchArray matrix as V in G = U*(diag(S))*V, if econ, returns the first min(m,n) rows of V order of output: always obeys the order U,S,V e.g. S = svd(G, compute_u = False, compute_v = False) U,S = svd(G, compute_u = True, compute_v = False) S,V = svd(G, compute_u = False, compute_v = True) U,S,V = svd(G, compute_u = True, compute_v = True) """ if G.__class__ is not parray.PitchArray: if G.__class__ is garray.GPUArray: h_G = G.get() del G.gpudata A = parray.to_gpu(h_G) elif G.__class__ is np.ndarray: A = parray.to_gpu(G) else: raise TypeError("G must be either parray, or GPUArray or ndarray") else: A = G real_dtype = np.dtype(np.float32) if A.dtype == np.complex64: svd_func = cula.culaDeviceCgesvd elif A.dtype == np.float32: svd_func = cula.culaDeviceSgesvd else: if cula._libcula_toolkit == 'standard': if A.dtype == np.complex128: svd_func = cula.culaDeviceZgesvd elif A.dtype == np.float64: svd_func = cula.culaDeviceDgesvd else: raise ValueError('unsupported type') real_dtype = np.dtype(np.float64) else: raise TypeError('does not support premium double precision svd') if len(A.shape) != 2: raise TypeError("svd only works on 2D matrix") S = parray.empty(min(A.shape), real_dtype) cula.culaInitialize() if compute_u: if compute_v: if econ: if A.shape[1] <= A.shape[0]: jobu = 'A' jobvt = 'O' V = parray.empty((A.shape[1], A.shape[1]), A.dtype) svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld, S.gpudata, V.gpudata, V.ld, 1, 1) #cula.culaShutdown() return A, S, V else: jobu = 'O' jobvt = 'A' U = parray.empty((A.shape[0], A.shape[0]), A.dtype) svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld, S.gpudata, 1, 1, U.gpudata, U.ld) #cula.culaShutdown() return U, S, A else: if A.shape[1] <= A.shape[0]: jobu = 'O' jobvt = 'A' U = parray.empty((A.shape[0], A.shape[0]), A.dtype) svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld, S.gpudata, 1, 1, U.gpudata, U.ld) #cula.culaShutdown() A.shape = (A.shape[1], A.shape[1]) return U, S, A else: jobu = 'A' jobvt = 'O' V = parray.empty((A.shape[1], A.shape[1]), A.dtype) svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld, S.gpudata, V.gpudata, V.ld, 1, 1) A.shape = (A.shape[0], A.shape[0]) #cula.culaShutdown() return A, S, V else: if econ | (A.shape[1] >= A.shape[0]): jobu = 'N' jobvt = 'O' svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld, S.gpudata, 1, 1, 1, 1) if (A.shape[1] > A.shape[0]): A.shape = (A.shape[0], A.shape[0]) #cula.culaShutdown() return A, S else: jobu = 'N' jobvt = 'A' U = parray.empty((A.shape[0], A.shape[0]), A.dtype) svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld, S.gpudata, 1, 1, U.gpudata, U.ld) #cula.culaShutdown() return U, S else: if compute_v: if econ | (A.shape[1] <= A.shape[0]): jobu = 'O' jobvt = 'N' svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld, S.gpudata, 1, 1, 1, 1) if (A.shape[1] < A.shape[0]): A.shape = (A.shape[1], A.shape[1]) #cula.culaShutdown() return S, A else: jobu = 'A' jobvt = 'N' V = parray.empty((A.shape[1], A.shape[1]), A.dtype) svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld, S.gpudata, V.gpudata, V.ld, 1, 1) #cula.culaShutdown() return S, V else: jobu = 'N' jobvt = 'N' svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld, S.gpudata, 1, 1, 1, 1) #cula.culaShutdown() return S
def dot(A, B, opa='n', opb='n', C=None, Cstart=None, scale=1.0, Cscale=0.0, handle=None): """ Multiplication of two matrices A and B in PitchArray format if C is specified, use the memory in C. Specified C must have the same leading dimension as that of the result and the other dimension must be bigger or equal to that of the result. Parameters ------------------------------------ A: parray.PitchArray B: parray.PitchArray opa: str operation on A 'n' or 'N': use A itself 't' or 'T': use transpose of A 'c' or 'C': use conjugate transpose of A opb: str operation on B 'n' or 'N': use B itself 't' or 'T': use transpose of B 'c' or 'C': use conjugate transpose of B C: parray.PitchArray if specified, the result will be stored in C Cstart: int the offset start of C array scale: float scaling factor for A*B see Cscale Cscale: float scaling factor for C result will be C = C*Cscale + scale*A*B Note: works only for CUDA VERSION > 4.0 where handle is introduced. """ if A.dtype != B.dtype: raise TypeError("matrix multiplication must have same dtype") if (len(A.shape) != 2) | (len(B.shape) != 2): raise TypeError("A, B must both be matrices") if opa in ['n', 'N']: m, n = A.shape elif opa in ['t', 'T', 'c', 'C']: n, m = A.shape else: raise ValueError("unknown value assigned to opa") if opb in ['n', 'N']: k, l = B.shape elif opb in ['t', 'T', 'c', 'C']: l, k = B.shape else: raise ValueError("unknown value assigned to opa") if (k != n) | (0 in [m, n, l]): raise ValueError("matrix dimension mismatch, " "(%d,%d) with (%d,%d)" % (m, n, k, l)) dtype = A.dtype if dtype in [np.float32, np.float64]: if opb in ['c', 'C']: opb = 't' if opa in ['c', 'C']: opa = 't' scale = dtype.type(scale) Cscale = dtype.type(Cscale) if dtype == np.float64: tp = 'cublas.cublasD' complex_type = False elif dtype == np.complex128: tp = 'cublas.cublasZ' complex_type = True elif dtype == np.float32: tp = 'cublas.cublasS' complex_type = False elif dtype == np.complex64: tp = 'cublas.cublasC' complex_type = True if C is None: C = parray.empty((m, l), dtype) Cstart = 0 Cempty = True Cscale = dtype.type(0) else: Cempty = False if Cstart is None: Cstart = 0 if C.shape[1] != l: raise AttributeError("shape of the provided result array " + C.shape.__str__() + " does not match intended result " + (m, l).__str__()) if C.shape[0] < m + Cstart: raise AttributeError("shape of the provided result array " + C.shape.__str__() + " does not match intended result " + (m, l).__str__()) if C.dtype != dtype: raise TypeError("Result array C provided must have " "the same dtype as inputs") conjA = False conjB = False conjC = False itemsize = C.dtype.itemsize handlestr = "handle.handle" if m == 1: if n == 1: #alpha = A.get()[0,0] #cuda.memcpy_dtod(int(C.gpudata) + Cstart * itemsize, # B.gpudata, l*dtype.itemsize) #if opa in ['c','C']: # alpha = np.conj(alpha) #if opb in ['c', 'C']: # C.conj() #func = (tp+"scal(l, alpha*scale, int(C.gpudata) + " # "Cstart * itemsize, 1)") alpha = A.get()[0, 0] if opa in ['c', 'C']: alpha = np.conj(alpha) C *= Cscale if opb in ['c', 'C']: func = (tp + "axpy(handle.handle, l, alpha*scale, " + "parray.conj(B).gpudata, 1," + "int(C.gpudata)+Cstart*itemsize, 1)") else: func = (tp + "axpy(handle.handle, l, alpha*scale, " + "B.gpudata, 1, " + "int(C.gpudata)+Cstart*itemsize, 1)") else: if l > 1: alpha = scale beta = Cscale if opa in ['c', 'C']: A.conj() conjA = True func = (tp + "gemv(handle.handle, '" + opb + "',B.shape[1], " + "B.shape[0], alpha, B.gpudata, B.ld, A.gpudata, " + "1, beta, int(C.gpudata)+Cstart*itemsize*C.ld, 1)") else: if opa in ['c', 'C']: if opb in ['c', 'C']: #func = ("C.set(np.array(" + tp # + "dotu(n, A.gpudata, 1, B.gpudata, 1)" # +").conj())") func = ("C.set(np.array(scale*" + tp + "dotu(handle.handle, n, A.gpudata, " + "1, B.gpudata, 1)" + ").conj()+C.get()*Cscale)") else: #func = ("C.set(np.array(" + tp # + "dotc(n, A.gpudata, 1, B.gpudata, 1)" # +"))") func = ("C.set(np.array(scale*" + tp + "dotc(handle.handle, n, A.gpudata, " + "1, B.gpudata, 1)) + C.get()*Cscale)") elif opb in ['c', 'C']: #func = ("C.set(np.array(" + tp # + "dotc(n, B.gpudata, 1, A.gpudata, 1)" +"))") func = ("C.set(np.array(scale*" + tp + "dotc(handle.handle, n, B.gpudata, 1, " + "A.gpudata, 1)) + C.get()*Cscale)") else: if complex_type: #func = ("C.set(np.array(" + tp # + "dotu(n, A.gpudata, 1, B.gpudata, 1)" # +"))") func = ("C.set(np.array(scale*" + tp + "dotu(handle.handle, n, A.gpudata, 1, " + "B.gpudata, 1)) + C.get()*Cscale)") else: #func = ("C.set(np.array(" + tp # + "dot(n, A.gpudata, 1, B.gpudata, 1)" # +"))") func = ("C.set(np.array(scale*" + tp + "dot(handle.handle, n, A.gpudata, 1, " + "B.gpudata, 1)) + C.get()*Cscale)") else: #m!=1 if n == 1: if l == 1: #alpha = B.get()[0,0] #cuda.memcpy_dtod(int(C.gpudata) + Cstart * itemsize, # A.gpudata, m*dtype.itemsize) #if opa in ['c','C']: # alpha = np.conj(alpha) #if opb in ['c', 'C']: # C.conj() #func = (tp+"scal(m, alpha, int(C.gpudata) " # "+ Cstart * itemsize,1)") alpha = B.get()[0, 0] if opb in ['c', 'C']: alpha = np.conj(alpha) C *= Cscale if opa in ['c', 'C']: func = (tp + "axpy(handle.handle, m, alpha*scale, " + "parray.conj(A).gpudata, 1, " + "int(C.gpudata)+Cstart*itemsize, 1)") else: func = (tp + "axpy(handle.handle, m, alpha*scale, " + "A.gpudata, 1, " + "int(C.gpudata)+Cstart*itemsize, 1)") else: #C.fill(0) C *= Cscale if opa in ['c', 'C']: if opb in ['c', 'C']: B.conj() conjB = True print l, m, scale, C.shape func = (tp + "gerc(handle.handle, l, m, scale, " + "B.gpudata, 1, A.gpudata, 1, " + "int(C.gpudata)+Cstart*itemsize*C.ld, " + "C.ld)") else: func = (tp + "gerc(handle.handle, l, m, scale, " + "B.gpudata, 1, A.gpudata, 1, " + "int(C.gpudata)+Cstart*itemsize*C.ld, " + "C.ld)") elif opb in ['c', 'C']: B.conj() conjB = True func = (tp + "geru(handle.handle, l, m, scale, " + "B.gpudata, 1, A.gpudata, 1, " + "int(C.gpudata)+Cstart*itemsize*C.ld, C.ld)") else: if complex_type: func = (tp + "geru(handle.handle, l, m, scale, " + "B.gpudata, 1, A.gpudata, 1, " + "int(C.gpudata)+Cstart*itemsize*C.ld, C.ld)") else: func = (tp + "ger(handle.handle, l, m, scale, " + "B.gpudata, 1, A.gpudata, 1, " + "int(C.gpudata)+Cstart*itemsize*C.ld, C.ld)") else: if l == 1: if opb in ['c', 'C']: if opa in ['c', 'C']: conjC = True if not Cempty: C.conj() Cscale = Cscale.conj() func = (tp + "gemv(handle.handle, 'n', A.shape[1], " + "A.shape[0], scale, A.gpudata, A.ld, " + "B.gpudata, 1, Cscale, int(C.gpudata) + " + "Cstart * itemsize * C.ld, 1)") else: B.conj() conjB = True if opa in ['t', 'T']: opa = 'n' else: opa = 't' func = (tp + "gemv(handle.handle, '" + opa + "', " + "A.shape[1], A.shape[0], scale, A.gpudata, " + "A.ld, B.gpudata, 1, Cscale, " + "int(C.gpudata)+Cstart*itemsize*C.ld, 1)") else: if opa in ['c', 'C']: B.conj() conjB = True conjC = True if not Cempty: C.conj() Cscale = Cscale.conj() func = (tp + "gemv(handle.handle, 'n', A.shape[1], " + "A.shape[0], scale, A.gpudata, A.ld, " + "B.gpudata, 1, Cscale, int(C.gpudata) + " + "Cstart * itemsize * C.ld, 1)") else: if opa in ['t', 'T']: opa = 'n' else: opa = 't' func = (tp + "gemv(handle.handle, '" + opa + "', " + "A.shape[1], A.shape[0], scale, A.gpudata, " + "A.ld, B.gpudata, 1, Cscale, int(C.gpudata) " + "+ Cstart * itemsize * C.ld, 1)") else: func = (tp + "gemm(handle.handle, '" + opb + "','" + opa + "', " + "l, m, k, scale, B.gpudata, B.ld, A.gpudata, A.ld, " + "Cscale, int(C.gpudata) + " + "Cstart * itemsize * C.ld, C.ld)") #if cublas._libcublas_ctx is None: # cublas.cublasInit() if handle is None: handle = cublashandle() eval(func) if conjC: C.conj() if conjA: A.conj() if conjB: B.conj() return C
def eig_sym(G, compute_z = True, uplo = 'U'): """ compute Eigenvalue Decompositon of a symmetric or Hermitian matrix G G = V D V^{*} Parameters ------------------------------------- G: PitchArray, GPUArray or numpy.ndarray if G is GPUArray or PitchArray, its gpudata will be destroyed after calling the function compute_z: bool whether return eigenvectors uplo: str 'U' or 'u' assumes the entries of G are stored in upper triangular part, lower off diagonal triangular part is not referenced 'L' or 'l' assumes the entries of G are stored in lower triangular part, upper off diagonal triangular part is not referenced Returns ------------------------------------- D: PitchArray a row vector containing all eigenvalues with ascending order V: PitchArray if compute_z, jth column of V contains orthonormal eigenvector associated with jth eigenvalue Examples D = eig_sym(G, compute_z = False) D,V = eig_sym(G, compute_z = True) """ if cula._libcula_toolkit != 'premium': raise ValueError("eigenvalue decomposition is only supported " "in premium version of CULA") if G.__class__ is not parray.PitchArray: if G.__class__ is garray.GPUArray: h_G = G.get() del G.gpudata A= parray.to_gpu(h_G) elif G.__class__ is np.ndarray: A = parray.to_gpu(G) else: raise TypeError("G must be either parray, or GPUArray or ndarray") else: A = G if len(A.shape) != 2: raise TypeError("eig only works on 2D matrix") if A.shape[0] != A.shape[1]: raise ValueError("G must be square matrix") if uplo in ['u', 'U']: uplo = 'L' elif uplo in ['l', 'L']: uplo = 'U' else: raise ValueError("uplo must be 'U' or 'L'") real_dtype = np.dtype(np.float32) if A.dtype == np.complex64: eig_func = cula.culaDeviceCheev elif A.dtype == np.float32: eig_func = cula.culaDeviceSsyev else: if A.dtype == np.complex128: eig_func = cula.culaDeviceZheev elif A.dtype == np.float64: eig_func = cula.culaDeviceDsyev else: raise ValueError('unsupported type') real_dtype = np.dtype(np.float64) D = parray.empty(A.shape[0], real_dtype) cula.culaInitialize() handle = cublashandle() if compute_z: jobz = 'V' else: jobz = 'N' eig_func(handle.handle, jobz, uplo, A.shape[0], A.gpudata, A.ld, D.gpudata) #cula.culaShutdown() if compute_z: return D, A.conj().T() else: return D
def svd(G, compute_u = True, compute_v = True, econ = False): """ compute Singular Value Decompositon of G G = U*(diag(S))*V Parameters ---------------------------------------- G: PitchArray, GPUArray or numpy.ndarray of shape (m,n) if G is GPUArray or PitchArray, its gpudata will be destroyed after calling the function compute_u: bool whether return U matrix or not compute_v: bool whether return V matrix or not econ: bool return economical matrix Returns: U: parray.PitchArray matrix as U in G = U*(diag(S))*V, if econ, returns the first min(m,n) columns of U S: parray.PitchArray vector a row vector containing all singular values with descending order V: parray.PitchArray matrix as V in G = U*(diag(S))*V, if econ, returns the first min(m,n) rows of V order of output: always obeys the order U,S,V e.g. S = svd(G, compute_u = False, compute_v = False) U,S = svd(G, compute_u = True, compute_v = False) S,V = svd(G, compute_u = False, compute_v = True) U,S,V = svd(G, compute_u = True, compute_v = True) """ if G.__class__ is not parray.PitchArray: if G.__class__ is garray.GPUArray: h_G = G.get() del G.gpudata A= parray.to_gpu(h_G) elif G.__class__ is np.ndarray: A = parray.to_gpu(G) else: raise TypeError("G must be either parray, or GPUArray or ndarray") else: A = G real_dtype = np.dtype(np.float32) if A.dtype == np.complex64: svd_func = cula.culaDeviceCgesvd elif A.dtype == np.float32: svd_func = cula.culaDeviceSgesvd else: if cula._libcula_toolkit == 'standard': if A.dtype == np.complex128: svd_func = cula.culaDeviceZgesvd elif A.dtype == np.float64: svd_func = cula.culaDeviceDgesvd else: raise ValueError('unsupported type') real_dtype = np.dtype(np.float64) else: raise TypeError('does not support premium double precision svd') if len(A.shape) != 2: raise TypeError("svd only works on 2D matrix") S = parray.empty(min(A.shape), real_dtype) cula.culaInitialize() if compute_u: if compute_v: if econ: if A.shape[1] <= A.shape[0]: jobu = 'A' jobvt = 'O' V = parray.empty((A.shape[1], A.shape[1]), A.dtype) svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld, S.gpudata, V.gpudata, V.ld, 1, 1) #cula.culaShutdown() return A,S,V else: jobu = 'O' jobvt = 'A' U = parray.empty((A.shape[0], A.shape[0]), A.dtype) svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld, S.gpudata, 1, 1, U.gpudata, U.ld) #cula.culaShutdown() return U,S,A else: if A.shape[1] <= A.shape[0]: jobu = 'O' jobvt = 'A' U = parray.empty((A.shape[0], A.shape[0]), A.dtype) svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld, S.gpudata, 1, 1, U.gpudata, U.ld) #cula.culaShutdown() A.shape = (A.shape[1],A.shape[1]) return U,S,A else: jobu = 'A' jobvt = 'O' V = parray.empty((A.shape[1], A.shape[1]), A.dtype) svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld, S.gpudata, V.gpudata, V.ld, 1, 1) A.shape = (A.shape[0], A.shape[0]) #cula.culaShutdown() return A,S,V else: if econ | (A.shape[1] >= A.shape[0]): jobu = 'N' jobvt = 'O' svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld, S.gpudata, 1, 1, 1, 1) if (A.shape[1] > A.shape[0]): A.shape = (A.shape[0], A.shape[0]) #cula.culaShutdown() return A,S else: jobu = 'N' jobvt = 'A' U = parray.empty((A.shape[0],A.shape[0]),A.dtype) svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld, S.gpudata, 1, 1, U.gpudata, U.ld) #cula.culaShutdown() return U,S else: if compute_v: if econ | (A.shape[1] <= A.shape[0]): jobu = 'O' jobvt = 'N' svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld, S.gpudata, 1, 1, 1, 1) if (A.shape[1] < A.shape[0]): A.shape = (A.shape[1], A.shape[1]) #cula.culaShutdown() return S,A else: jobu = 'A' jobvt = 'N' V = parray.empty((A.shape[1],A.shape[1]),A.dtype) svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld, S.gpudata, V.gpudata, V.ld, 1, 1) #cula.culaShutdown() return S,V else: jobu = 'N' jobvt = 'N' svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld, S.gpudata, 1, 1, 1, 1) #cula.culaShutdown() return S