def initialize(self, wfs): self.timer = wfs.timer self.world = wfs.world self.kpt_comm = wfs.kd.comm self.band_comm = wfs.band_comm self.dtype = wfs.dtype self.bd = wfs.bd self.ksl = wfs.diagksl self.nbands = wfs.bd.nbands self.mynbands = wfs.bd.mynbands self.operator = wfs.matrixoperator if self.mynbands != self.nbands or self.operator.nblocks != 1: self.keep_htpsit = False if self.keep_htpsit: self.Htpsit_nG = wfs.empty(self.nbands) if use_mic: self.Htpsit_nG_mic = stream.bind(self.Htpsit_nG) stream.sync() # Preconditioner for the electronic gradients: self.preconditioner = wfs.make_preconditioner(self.blocksize) for kpt in wfs.kpt_u: if kpt.eps_n is None: kpt.eps_n = np.empty(self.mynbands) # Allocate arrays for matrix operator self.operator.allocate_arrays() self.initialized = True
def initialize_from_lcao_coefficients(self, basis_functions, mynbands): for kpt in self.kpt_u: kpt.psit_nG = self.gd.zeros(self.bd.mynbands, self.dtype) basis_functions.lcao_to_grid(kpt.C_nM, kpt.psit_nG[:mynbands], kpt.q) kpt.C_nM = None if use_mic: kpt.psit_nG_mic = stream.bind(kpt.psit_nG) stream.sync()
def allocate_arrays(self): ngroups = self.bd.comm.size mynbands = self.bd.mynbands dtype = self.dtype if ngroups > 1: self.A_qnn = np.zeros((self.Q, mynbands, mynbands), dtype) self.A_nn = self.bmd.zeros(dtype=dtype) if use_mic: self.A_nn_mic = stream.bind(self.A_nn) stream.sync() if ngroups == 1 and self.nblocks == 1: self.work1_xG = self.gd.empty(self.bd.mynbands, self.dtype) if use_mic: self.work1_xG_mic = stream.bind(self.work1_xG) stream.sync() else: self.work1_xG = self.gd.empty(self.X, self.dtype) self.work2_xG = self.gd.empty(self.X, self.dtype)
def empty(self, n=(), dtype=float, global_array=False, pad=False, usemic=False): """Return new uninitialized 3D array for this domain. The type can be set with the ``dtype`` keyword (default: ``float``). Extra dimensions can be added with ``n=dim``. A global array spanning all domains can be allocated with ``global_array=True``.""" array = self._new_array(n, dtype, False, global_array, pad) if usemic: oa = stream.bind(array) stream.sync() return oa else: return array
def integrate(self, a_xg, b_yg=None, global_integral=True, hermitian=False, _transposed_result=None): """Integrate function(s) over domain. a_xg: ndarray Function(s) to be integrated. b_yg: ndarray If present, integrate a_xg.conj() * b_yg. global_integral: bool If the array(s) are distributed over several domains, then the total sum will be returned. To get the local contribution only, use global_integral=False. hermitian: bool Result is hermitian. _transposed_result: ndarray Long story. Don't use this unless you are a method of the MatrixOperator class ...""" xshape = a_xg.shape[:-3] if b_yg is None: # Only one array: result = a_xg.reshape(xshape + (-1,)).sum(axis=-1) * self.dv if global_integral: if result.ndim == 0: result = self.comm.sum(result) else: self.comm.sum(result) return result if isinstance(a_xg, mic.OffloadArray): # offload arrays have to be contiguous in any case A_xg = a_xg B_yg = b_yg else: A_xg = np.ascontiguousarray(a_xg.reshape((-1,) + a_xg.shape[-3:])) B_yg = np.ascontiguousarray(b_yg.reshape((-1,) + b_yg.shape[-3:])) if _transposed_result is None: result_yx = np.zeros((len(B_yg), len(A_xg)), A_xg.dtype) else: result_yx = _transposed_result global_integral = False if isinstance(a_xg, mic.OffloadArray): result_yx_mic = stream.bind(result_yx) stream.sync() # result_yx_mic.fillfrom(result_yx) # result_yx_mic.array[:] = result_yx[:] # result_yx_mic.update_device() if a_xg is b_yg: if isinstance(a_xg, mic.OffloadArray): # dsyrk performs badly in MIC so use dgemm here # mic_rk(self.dv, A_xg, 0.0, result_yx_mic) mic_gemm(self.dv, A_xg, A_xg, 0.0, result_yx_mic, 'c') else: rk(self.dv, A_xg, 0.0, result_yx) elif hermitian: if isinstance(a_xg, mic.OffloadArray): mic_r2k(self.dv, A_xg, B_yg, 0.0, result_yx_mic) else: r2k(0.5 * self.dv, A_xg, B_yg, 0.0, result_yx) else: if isinstance(a_xg, mic.OffloadArray): mic_gemm(self.dv, A_xg, B_yg, 0.0, result_yx_mic, 'c') else: gemm(self.dv, A_xg, B_yg, 0.0, result_yx, 'c') if isinstance(a_xg, mic.OffloadArray): result_yx_mic.update_host() stream.sync() if global_integral: self.comm.sum(result_yx) yshape = b_yg.shape[:-3] result = result_yx.T.reshape(xshape + yshape) if result.ndim == 0: return result.item() else: return result
def orthonormalize(self, wfs, kpt, psit_nG=None): """Orthonormalizes the vectors a_nG with respect to the overlap. First, a Cholesky factorization C is done for the overlap matrix S_nn = <a_nG | S | a_nG> = C*_nn C_nn Cholesky matrix C is inverted and orthonormal vectors a_nG' are obtained as:: psit_nG' = inv(C_nn) psit_nG __ ~ _ \ -1 ~ _ psi (r) = ) C psi (r) n /__ nm m m Parameters ---------- psit_nG: ndarray, input/output On input the set of vectors to orthonormalize, on output the overlap-orthonormalized vectors. kpt: KPoint object: k-point object from kpoint.py. work_nG: ndarray Optional work array for overlap matrix times psit_nG. work_nn: ndarray Optional work array for overlap matrix. """ self.timer.start('Orthonormalize') if psit_nG is None: psit_nG = kpt.psit_nG if use_mic: psit_nG_mic = kpt.psit_nG_mic else: if use_mic: psit_nG_mic = stream.bind(psit_nG, update_device=False) stream.sync() P_ani = kpt.P_ani self.timer.start('projections') wfs.pt.integrate(psit_nG, P_ani, kpt.q) self.timer.stop('projections') # Construct the overlap matrix: operator = wfs.matrixoperator def S(psit_G): return psit_G def dS(a, P_ni): return np.dot(P_ni, wfs.setups[a].dO_ii) if use_mic: self.timer.start('calc_s_matrix') psit_nG_mic.update_device() stream.sync() S_nn = operator.calculate_matrix_elements(psit_nG_mic, P_ani, S, dS) self.timer.stop('calc_s_matrix') else: self.timer.start('calc_s_matrix') S_nn = operator.calculate_matrix_elements(psit_nG, P_ani, S, dS) self.timer.stop('calc_s_matrix') orthonormalization_string = repr(self.ksl) self.timer.start(orthonormalization_string) # if extra_parameters.get('sic', False): # # symmetric Loewdin Orthonormalization tri2full(S_nn, UL='L', map=np.conj) nrm_n = np.empty(S_nn.shape[0]) diagonalize(S_nn, nrm_n) nrm_nn = np.diag(1.0 / np.sqrt(nrm_n)) S_nn = np.dot(np.dot(S_nn.T.conj(), nrm_nn), S_nn) else: # self.ksl.inverse_cholesky(S_nn) # S_nn now contains the inverse of the Cholesky factorization. # Let's call it something different: C_nn = S_nn del S_nn self.timer.stop(orthonormalization_string) self.timer.start('rotate_psi') if use_mic: operator.matrix_multiply(C_nn, psit_nG_mic, P_ani, out_nG=kpt.psit_nG_mic) kpt.psit_nG_mic.update_host() stream.sync() # kpt.psit_nG[:] = self.psit_nG_mic.array[:] else: operator.matrix_multiply(C_nn, psit_nG, P_ani, out_nG=kpt.psit_nG) self.timer.stop('rotate_psi') self.timer.stop('Orthonormalize')
def orthonormalize(self, wfs, kpt, psit_nG=None): """Orthonormalizes the vectors a_nG with respect to the overlap. First, a Cholesky factorization C is done for the overlap matrix S_nn = <a_nG | S | a_nG> = C*_nn C_nn Cholesky matrix C is inverted and orthonormal vectors a_nG' are obtained as:: psit_nG' = inv(C_nn) psit_nG __ ~ _ \ -1 ~ _ psi (r) = ) C psi (r) n /__ nm m m Parameters ---------- psit_nG: ndarray, input/output On input the set of vectors to orthonormalize, on output the overlap-orthonormalized vectors. kpt: KPoint object: k-point object from kpoint.py. work_nG: ndarray Optional work array for overlap matrix times psit_nG. work_nn: ndarray Optional work array for overlap matrix. """ self.timer.start('Orthonormalize') if psit_nG is None: psit_nG = kpt.psit_nG if use_mic: psit_nG_mic = kpt.psit_nG_mic else: if use_mic: psit_nG_mic = stream.bind(psit_nG, update_device=False) stream.sync() P_ani = kpt.P_ani self.timer.start('projections') wfs.pt.integrate(psit_nG, P_ani, kpt.q) self.timer.stop('projections') # Construct the overlap matrix: operator = wfs.matrixoperator def S(psit_G): return psit_G def dS(a, P_ni): return np.dot(P_ni, wfs.setups[a].dO_ii) if use_mic: self.timer.start('calc_s_matrix') psit_nG_mic.update_device() stream.sync() S_nn = operator.calculate_matrix_elements(psit_nG_mic, P_ani, S, dS) self.timer.stop('calc_s_matrix') else: self.timer.start('calc_s_matrix') S_nn = operator.calculate_matrix_elements(psit_nG, P_ani, S, dS) self.timer.stop('calc_s_matrix') orthonormalization_string = repr(self.ksl) self.timer.start(orthonormalization_string) # if extra_parameters.get('sic', False): # # symmetric Loewdin Orthonormalization tri2full(S_nn, UL='L', map=np.conj) nrm_n = np.empty(S_nn.shape[0]) diagonalize(S_nn, nrm_n) nrm_nn = np.diag(1.0/np.sqrt(nrm_n)) S_nn = np.dot(np.dot(S_nn.T.conj(), nrm_nn), S_nn) else: # self.ksl.inverse_cholesky(S_nn) # S_nn now contains the inverse of the Cholesky factorization. # Let's call it something different: C_nn = S_nn del S_nn self.timer.stop(orthonormalization_string) self.timer.start('rotate_psi') if use_mic: operator.matrix_multiply(C_nn, psit_nG_mic, P_ani, out_nG=kpt.psit_nG_mic) kpt.psit_nG_mic.update_host() stream.sync() # kpt.psit_nG[:] = self.psit_nG_mic.array[:] else: operator.matrix_multiply(C_nn, psit_nG, P_ani, out_nG=kpt.psit_nG) self.timer.stop('rotate_psi') self.timer.stop('Orthonormalize')