def get_j_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1, 3)), kpts_band=None): if kpts_band is not None: return get_j_for_bands(mydf, dm_kpts, hermi, kpts, kpts_band) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) n_dm, nkpts, nao = dms.shape[:3] vj_kpts = numpy.zeros((n_dm, nkpts, nao, nao), dtype=numpy.complex128) kpt_allow = numpy.zeros(3) mesh = mydf.mesh coulG = mydf.weighted_coulG(kpt_allow, False, mesh) max_memory = (mydf.max_memory - lib.current_memory()[0]) * .8 weight = 1. / len(kpts) for aoaoks, p0, p1 in mydf.ft_loop(mesh, kpt_allow, kpts, max_memory=max_memory): _update_vj_(vj_kpts, aoaoks, dms, coulG[p0:p1], weight) aoaoks = None if gamma_point(kpts): vj_kpts = vj_kpts.real.copy() return _format_jks(vj_kpts, dm_kpts, kpts_band, kpts)
def get_j_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1,3)), kpts_band=None): cell = mydf.cell log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (time.clock(), time.time()) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] dmsR = dms.real.reshape(nset,nkpts,nao**2) dmsI = dms.imag.reshape(nset,nkpts,nao**2) kpt_allow = numpy.zeros(3) coulG = mydf.weighted_coulG(kpt_allow, False, mydf.gs) ngs = len(coulG) vR = numpy.zeros((nset,ngs)) vI = numpy.zeros((nset,ngs)) max_memory = (mydf.max_memory - lib.current_memory()[0]) * .8 for k, pqkR, pqkI, p0, p1 \ in mydf.ft_loop(mydf.gs, kpt_allow, kpts, max_memory=max_memory): #:rho = numpy.einsum('lkL,lk->L', pqk.conj(), dm) for i in range(nset): rhoR = numpy.dot(dmsR[i,k], pqkR) rhoR+= numpy.dot(dmsI[i,k], pqkI) rhoI = numpy.dot(dmsI[i,k], pqkR) rhoI-= numpy.dot(dmsR[i,k], pqkI) vR[i,p0:p1] += rhoR * coulG[p0:p1] vI[i,p0:p1] += rhoI * coulG[p0:p1] pqkR = pqkI = coulG = None weight = 1./len(kpts) vR *= weight vI *= weight t1 = log.timer_debug1('get_j pass 1 to compute J(G)', *t1) kpts_band, single_kpt_band = _format_kpts_band(kpts_band, kpts) gamma_point = abs(kpts_band).sum() < 1e-9 nband = len(kpts_band) vjR = numpy.zeros((nset,nband,nao*nao)) vjI = numpy.zeros((nset,nband,nao*nao)) for k, pqkR, pqkI, p0, p1 \ in mydf.ft_loop(mydf.gs, kpt_allow, kpts_band, max_memory=max_memory): for i in range(nset): vjR[i,k] += numpy.dot(pqkR, vR[i,p0:p1]) vjR[i,k] -= numpy.dot(pqkI, vI[i,p0:p1]) if not gamma_point: for i in range(nset): vjI[i,k] += numpy.dot(pqkI, vR[i,p0:p1]) vjI[i,k] += numpy.dot(pqkR, vI[i,p0:p1]) pqkR = pqkI = coulG = None if gamma_point: vj_kpts = vjR else: vj_kpts = vjR + vjI*1j vj_kpts = vj_kpts.reshape(nset,nband,nao,nao) t1 = log.timer_debug1('get_j pass 2', *t1) return _format_jks(vj_kpts, dm_kpts, kpts_band, kpts, single_kpt_band)
def get_j_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1,3)), kpts_band=None): if kpts_band is not None: return get_j_for_bands(mydf, dm_kpts, hermi, kpts, kpts_band) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] vj_kpts = numpy.zeros((nset,nkpts,nao,nao), dtype=numpy.complex128) kpt_allow = numpy.zeros(3) mesh = mydf.mesh coulG = mydf.weighted_coulG(kpt_allow, False, mesh) max_memory = (mydf.max_memory - lib.current_memory()[0]) * .8 weight = 1./len(kpts) dmsC = dms.conj() for aoaoks, p0, p1 in mydf.ft_loop(mesh, kpt_allow, kpts, max_memory=max_memory): vG = [0] * nset #:rho = numpy.einsum('lkL,lk->L', pqk.conj(), dm) for k, aoao in enumerate(aoaoks): for i in range(nset): rho = numpy.einsum('ij,Lij->L', dmsC[i,k], aoao.reshape(-1,nao,nao)).conj() vG[i] += rho * coulG[p0:p1] for i in range(nset): vG[i] *= weight for k, aoao in enumerate(aoaoks): for i in range(nset): vj_kpts[i,k] += numpy.einsum('L,Lij->ij', vG[i], aoao.reshape(-1,nao,nao)) aoao = aoaoks = p0 = p1 = None if gamma_point(kpts): vj_kpts = vj_kpts.real.copy() return _format_jks(vj_kpts, dm_kpts, kpts_band, kpts)
def get_j_kpts(mydf, dm_kpts, hermi=1, kpts=np.zeros((1, 3)), kpts_band=None): '''Get the Coulomb (J) AO matrix at sampled k-points. Args: dm_kpts : (nkpts, nao, nao) ndarray or a list of (nkpts,nao,nao) ndarray Density matrix at each k-point. If a list of k-point DMs, eg, UHF alpha and beta DM, the alpha and beta DMs are contracted separately. kpts : (nkpts, 3) ndarray Kwargs: kpts_band : (3,) ndarray or (*,3) ndarray A list of arbitrary "band" k-points at which to evalute the matrix. Returns: vj : (nkpts, nao, nao) ndarray or list of vj if the input dm_kpts is a list of DMs ''' cell = mydf.cell mesh = mydf.mesh low_dim_ft_type = mydf.low_dim_ft_type ni = mydf._numint make_rho, nset, nao = ni._gen_rho_evaluator(cell, dm_kpts, hermi) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] coulG = tools.get_coulG(cell, mesh=mesh, low_dim_ft_type=low_dim_ft_type) ngrids = len(coulG) vR = rhoR = np.zeros((nset, ngrids)) for ao_ks_etc, p0, p1 in mydf.aoR_loop(mydf.grids, kpts): ao_ks, mask = ao_ks_etc[0], ao_ks_etc[2] for i in range(nset): rhoR[i, p0:p1] += make_rho(i, ao_ks, mask, 'LDA') ao = ao_ks = None for i in range(nset): rhoG = tools.fft(rhoR[i], mesh) vG = coulG * rhoG vR[i] = tools.ifft(vG, mesh).real kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) weight = cell.vol / ngrids vR *= weight if gamma_point(kpts_band): vj_kpts = np.zeros((nset, nband, nao, nao)) else: vj_kpts = np.zeros((nset, nband, nao, nao), dtype=np.complex128) rho = None for ao_ks_etc, p0, p1 in mydf.aoR_loop(mydf.grids, kpts_band): ao_ks, mask = ao_ks_etc[0], ao_ks_etc[2] for i in range(nset): vj_kpts[i] += ni.eval_mat(cell, ao_ks, 1., None, vR[i, p0:p1], mask, 'LDA') return _format_jks(vj_kpts, dm_kpts, input_band, kpts)
def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=np.zeros((1, 3)), kpts_band=None, exxdiv=None): '''Get the Coulomb (J) and exchange (K) AO matrices at sampled k-points. Args: dm_kpts : (nkpts, nao, nao) ndarray Density matrix at each k-point kpts : (nkpts, 3) ndarray Kwargs: kpts_band : (3,) ndarray or (*,3) ndarray A list of arbitrary "band" k-points at which to evalute the matrix. Returns: vj : (nkpts, nao, nao) ndarray vk : (nkpts, nao, nao) ndarray or list of vj and vk if the input dm_kpts is a list of DMs ''' cell = mydf.cell gs = mydf.gs coords = cell.gen_uniform_grids(gs) ngs = coords.shape[0] kpts = np.asarray(kpts) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] weight = 1. / nkpts * (cell.vol / ngs) kpts_band, single_kpt_band = _format_kpts_band(kpts_band, kpts) nband = len(kpts_band) if gamma_point(kpts_band) and gamma_point(kpts): vk_kpts = np.zeros((nset, nband, nao, nao), dtype=dms.dtype) else: vk_kpts = np.zeros((nset, nband, nao, nao), dtype=np.complex128) for k2, ao_k2 in mydf.aoR_loop(gs, kpts): kpt2 = kpts[k2] aoR_dms = [lib.dot(ao_k2, dms[i, k2]) for i in range(nset)] for k1, ao_k1 in mydf.aoR_loop(gs, kpts_band): kpt1 = kpts_band[k1] vkR_k1k2 = get_vkR(mydf, cell, ao_k1, ao_k2, kpt1, kpt2, coords, gs, exxdiv) for i in range(nset): tmp_Rq = np.einsum('Rqs,Rs->Rq', vkR_k1k2, aoR_dms[i]) vk_kpts[i, k1] += weight * lib.dot(ao_k1.T.conj(), tmp_Rq) vkR_k1k2 = aoR_dms = tmp_Rq = None return _format_jks(vk_kpts, dm_kpts, kpts_band, kpts, single_kpt_band)
def get_j_for_bands(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1, 3)), kpts_band=None): log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (time.clock(), time.time()) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] dmsR = dms.real.reshape(nset, nkpts, nao**2) dmsI = dms.imag.reshape(nset, nkpts, nao**2) kpt_allow = numpy.zeros(3) mesh = mydf.mesh coulG = mydf.weighted_coulG(kpt_allow, False, mesh) ngrids = len(coulG) vG = numpy.zeros((nset, ngrids), dtype=numpy.complex128) max_memory = (mydf.max_memory - lib.current_memory()[0]) * .8 dmsC = dms.conj() for aoaoks, p0, p1 in mydf.ft_loop(mesh, kpt_allow, kpts, max_memory=max_memory): #:rho = numpy.einsum('lkL,lk->L', pqk.conj(), dm) for k, aoao in enumerate(aoaoks): for i in range(nset): rho = numpy.einsum('ij,Lij->L', dmsC[i, k], aoao.reshape(-1, nao, nao)).conj() vG[i, p0:p1] += rho * coulG[p0:p1] aoao = aoaoks = p0 = p1 = None weight = 1. / len(kpts) vG *= weight t1 = log.timer_debug1('get_j pass 1 to compute J(G)', *t1) kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) vj_kpts = numpy.zeros((nset, nband, nao, nao), dtype=numpy.complex128) for aoaoks, p0, p1 in mydf.ft_loop(mesh, kpt_allow, kpts_band, max_memory=max_memory): for k, aoao in enumerate(aoaoks): for i in range(nset): vj_kpts[i, k] += numpy.einsum('L,Lij->ij', vG[i, p0:p1], aoao.reshape(-1, nao, nao)) aoao = aoaoks = p0 = p1 = None if gamma_point(kpts_band): vj_kpts = vj_kpts.real.copy() t1 = log.timer_debug1('get_j pass 2', *t1) return _format_jks(vj_kpts, dm_kpts, input_band, kpts)
def get_j_kpts(self, dm_kpts, hermi=1, kpts=np.zeros((1,3)), kpts_band=None): ''' C ~ compact basis, D ~ diffused basis Compute J matrix with coulG_LR: (CC|CC) (CC|CD) (CC|DC) (CD|CC) (CD|CD) (CD|DC) (DC|CC) (DC|CD) (DC|DC) Compute J matrix with full coulG: (CC|DD) (CD|DD) (DC|DD) (DD|CC) (DD|CD) (DD|DC) (DD|DD) ''' if kpts_band is not None: return self.get_j_for_bands(dm_kpts, hermi, kpts, kpts_band) cell = self.cell dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) n_dm, nkpts, nao = dms.shape[:3] n_diffused = cell._nbas_each_set[2] nao_compact = cell.ao_loc[cell.nbas-n_diffused] vj_kpts = np.zeros((n_dm,nkpts,nao,nao), dtype=np.complex128) kpt_allow = np.zeros(3) mesh = self.mesh coulG = self.weighted_coulG(kpt_allow, False, mesh) coulG_LR = self.weighted_coulG_LR(kpt_allow, False, mesh) coulG_SR = coulG - coulG_LR max_memory = (self.max_memory - lib.current_memory()[0]) * .8 weight = 1./len(kpts) for aoaoks, p0, p1 in self.ft_loop(mesh, kpt_allow, kpts, max_memory=max_memory): if nao_compact < nao: aoaoks = [aoao.reshape(-1,nao,nao) for aoao in aoaoks] aft_jk._update_vj_(vj_kpts, aoaoks, dms, coulG[p0:p1], weight) for aoao in aoaoks: aoao[:,nao_compact:,nao_compact:] = 0 aft_jk._update_vj_(vj_kpts, aoaoks, dms, coulG_SR[p0:p1], -weight) else: aft_jk._update_vj_(vj_kpts, aoaoks, dms, coulG_LR[p0:p1], weight) aoao = aoaoks = p0 = p1 = None # G=0 contribution, associated to 2e integrals in real-space if cell.dimension >= 2: ovlp = np.asarray(cell.pbc_intor('int1e_ovlp', hermi=1, kpts=kpts)) if nao_compact < nao: ovlp[:,nao_compact:,nao_compact:] = 0 kws = cell.get_Gv_weights(mesh)[2] G0_weight = kws[0] if isinstance(kws, np.ndarray) else kws vj_G0 = lib.einsum('kpq,nkqp,lrs->nlrs', ovlp, dm_kpts, ovlp) vj_kpts -= np.pi/self.omega**2 * weight * G0_weight * vj_G0 if gamma_point(kpts): vj_kpts = vj_kpts.real.copy() return _format_jks(vj_kpts, dm_kpts, kpts_band, kpts)
def get_j_kpts(mydf, dm_kpts, hermi=1, kpts=np.zeros((1, 3)), kpts_band=None): '''Get the Coulomb (J) AO matrix at sampled k-points. Args: dm_kpts : (nkpts, nao, nao) ndarray or a list of (nkpts,nao,nao) ndarray Density matrix at each k-point. If a list of k-point DMs, eg, UHF alpha and beta DM, the alpha and beta DMs are contracted separately. kpts : (nkpts, 3) ndarray Kwargs: kpts_band : (3,) ndarray or (*,3) ndarray A list of arbitrary "band" k-points at which to evalute the matrix. Returns: vj : (nkpts, nao, nao) ndarray or list of vj if the input dm_kpts is a list of DMs ''' cell = mydf.cell gs = mydf.gs dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] coulG = tools.get_coulG(cell, gs=gs) ngs = len(coulG) vR = rhoR = np.zeros((nset, ngs)) for k, aoR in mydf.aoR_loop(gs, kpts): for i in range(nset): rhoR[i] += numint.eval_rho(cell, aoR, dms[i, k]) for i in range(nset): rhoR[i] *= 1. / nkpts rhoG = tools.fft(rhoR[i], gs) vG = coulG * rhoG vR[i] = tools.ifft(vG, gs).real kpts_band, single_kpt_band = _format_kpts_band(kpts_band, kpts) nband = len(kpts_band) vj_kpts = [] weight = cell.vol / ngs if gamma_point(kpts_band): vj_kpts = np.empty((nset, nband, nao, nao)) else: vj_kpts = np.empty((nset, nband, nao, nao), dtype=np.complex128) for k, aoR in mydf.aoR_loop(gs, kpts_band): for i in range(nset): vj_kpts[i, k] = weight * lib.dot(aoR.T.conj() * vR[i], aoR) return _format_jks(vj_kpts, dm_kpts, kpts_band, kpts, single_kpt_band)
def get_j_for_bands(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1, 3)), kpts_band=None): log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (logger.process_clock(), logger.perf_counter()) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] kpt_allow = numpy.zeros(3) mesh = mydf.mesh coulG = mydf.weighted_coulG(kpt_allow, False, mesh) ngrids = len(coulG) rhoG = numpy.zeros((nset, ngrids), dtype=numpy.complex128) max_memory = (mydf.max_memory - lib.current_memory()[0]) * .8 for aoaoks, p0, p1 in mydf.ft_loop(mesh, kpt_allow, kpts, max_memory=max_memory): for k, aoao in enumerate(aoaoks): rhoG[:, p0:p1] += numpy.einsum('nij,Lij->nL', dms[:, k].conj(), aoao.reshape(-1, nao, nao)).conj() aoao = aoaoks = p0 = p1 = None weight = 1. / len(kpts) vG = rhoG * coulG * weight t1 = log.timer_debug1('get_j pass 1 to compute J(G)', *t1) kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) vj_kpts = numpy.zeros((nset, nband, nao, nao), dtype=numpy.complex128) for aoaoks, p0, p1 in mydf.ft_loop(mesh, kpt_allow, kpts_band, max_memory=max_memory): for k, aoao in enumerate(aoaoks): vj_kpts[:, k] += numpy.einsum('nL,Lij->nij', vG[:, p0:p1], aoao.reshape(-1, nao, nao)) aoao = aoaoks = p0 = p1 = None if gamma_point(kpts_band): vj_kpts = vj_kpts.real.copy() t1 = log.timer_debug1('get_j pass 2', *t1) return _format_jks(vj_kpts, dm_kpts, input_band, kpts)
def get_j_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1,3)), kpts_band=None): mydf = _sync_mydf(mydf) cell = mydf.cell mesh = mydf.mesh dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] coulG = tools.get_coulG(cell, mesh=mesh) ngrids = len(coulG) vR = rhoR = numpy.zeros((nset,ngrids)) for ao_ks_etc, p0, p1 in mydf.mpi_aoR_loop(mydf.grids, kpts): ao_ks = ao_ks_etc[0] for k, ao in enumerate(ao_ks): for i in range(nset): rhoR[i,p0:p1] += numint.eval_rho(cell, ao, dms[i,k]) ao = ao_ks = None rhoR = mpi.allreduce(rhoR) for i in range(nset): rhoR[i] *= 1./nkpts rhoG = tools.fft(rhoR[i], mesh) vG = coulG * rhoG vR[i] = tools.ifft(vG, mesh).real kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) weight = cell.vol / ngrids vR *= weight if gamma_point(kpts_band): vj_kpts = numpy.zeros((nset,nband,nao,nao)) else: vj_kpts = numpy.zeros((nset,nband,nao,nao), dtype=numpy.complex128) for ao_ks_etc, p0, p1 in mydf.mpi_aoR_loop(mydf.grids, kpts_band): ao_ks = ao_ks_etc[0] for k, ao in enumerate(ao_ks): for i in range(nset): vj_kpts[i,k] += lib.dot(ao.T.conj()*vR[i,p0:p1], ao) vj_kpts = mpi.reduce(vj_kpts) if gamma_point(kpts_band): vj_kpts = vj_kpts.real return _format_jks(vj_kpts, dm_kpts, input_band, kpts)
def get_j_for_bands(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1,3)), kpts_band=None): log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (time.clock(), time.time()) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] dmsR = dms.real.reshape(nset,nkpts,nao**2) dmsI = dms.imag.reshape(nset,nkpts,nao**2) kpt_allow = numpy.zeros(3) mesh = mydf.mesh coulG = mydf.weighted_coulG(kpt_allow, False, mesh) ngrids = len(coulG) vG = numpy.zeros((nset,ngrids), dtype=numpy.complex128) max_memory = (mydf.max_memory - lib.current_memory()[0]) * .8 dmsC = dms.conj() for aoaoks, p0, p1 in mydf.ft_loop(mesh, kpt_allow, kpts, max_memory=max_memory): #:rho = numpy.einsum('lkL,lk->L', pqk.conj(), dm) for k, aoao in enumerate(aoaoks): for i in range(nset): rho = numpy.einsum('ij,Lij->L', dmsC[i,k], aoao.reshape(-1,nao,nao)).conj() vG[i,p0:p1] += rho * coulG[p0:p1] aoao = aoaoks = p0 = p1 = None weight = 1./len(kpts) vG *= weight t1 = log.timer_debug1('get_j pass 1 to compute J(G)', *t1) kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) vj_kpts = numpy.zeros((nset,nband,nao,nao), dtype=numpy.complex128) for aoaoks, p0, p1 in mydf.ft_loop(mesh, kpt_allow, kpts_band, max_memory=max_memory): for k, aoao in enumerate(aoaoks): for i in range(nset): vj_kpts[i,k] += numpy.einsum('L,Lij->ij', vG[i,p0:p1], aoao.reshape(-1,nao,nao)) aoao = aoaoks = p0 = p1 = None if gamma_point(kpts_band): vj_kpts = vj_kpts.real.copy() t1 = log.timer_debug1('get_j pass 2', *t1) return _format_jks(vj_kpts, dm_kpts, input_band, kpts)
def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=np.zeros((1, 3)), kpts_band=None, exxdiv=None): '''Get the Coulomb (J) and exchange (K) AO matrices at sampled k-points. Args: dm_kpts : (nkpts, nao, nao) ndarray Density matrix at each k-point kpts : (nkpts, 3) ndarray Kwargs: hermi : int Whether K matrix is hermitian | 0 : not hermitian and not symmetric | 1 : hermitian kpts_band : (3,) ndarray or (*,3) ndarray A list of arbitrary "band" k-points at which to evalute the matrix. Returns: vj : (nkpts, nao, nao) ndarray vk : (nkpts, nao, nao) ndarray or list of vj and vk if the input dm_kpts is a list of DMs ''' cell = mydf.cell mesh = mydf.mesh coords = cell.gen_uniform_grids(mesh) ngrids = coords.shape[0] if getattr(dm_kpts, 'mo_coeff', None) is not None: mo_coeff = dm_kpts.mo_coeff mo_occ = dm_kpts.mo_occ else: mo_coeff = None kpts = np.asarray(kpts) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] weight = 1. / nkpts * (cell.vol / ngrids) kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) if gamma_point(kpts_band) and gamma_point(kpts): vk_kpts = np.zeros((nset, nband, nao, nao), dtype=dms.dtype) else: vk_kpts = np.zeros((nset, nband, nao, nao), dtype=np.complex128) coords = mydf.grids.coords ao2_kpts = [ np.asarray(ao.T, order='C') for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts) ] if input_band is None: ao1_kpts = ao2_kpts else: ao1_kpts = [ np.asarray(ao.T, order='C') for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts_band) ] if mo_coeff is not None and nset == 1: mo_coeff = [ mo_coeff[k][:, occ > 0] * np.sqrt(occ[occ > 0]) for k, occ in enumerate(mo_occ) ] ao2_kpts = [np.dot(mo_coeff[k].T, ao) for k, ao in enumerate(ao2_kpts)] mem_now = lib.current_memory()[0] max_memory = mydf.max_memory - mem_now blksize = int( min(nao, max(1, (max_memory - mem_now) * 1e6 / 16 / 4 / ngrids / nao))) lib.logger.debug1(mydf, 'fft_jk: get_k_kpts max_memory %s blksize %d', max_memory, blksize) #ao1_dtype = np.result_type(*ao1_kpts) #ao2_dtype = np.result_type(*ao2_kpts) vR_dm = np.empty((nset, nao, ngrids), dtype=vk_kpts.dtype) t1 = (time.clock(), time.time()) for k2, ao2T in enumerate(ao2_kpts): if ao2T.size == 0: continue kpt2 = kpts[k2] naoj = ao2T.shape[0] if mo_coeff is None or nset > 1: ao_dms = [lib.dot(dms[i, k2], ao2T.conj()) for i in range(nset)] else: ao_dms = [ao2T.conj()] for k1, ao1T in enumerate(ao1_kpts): kpt1 = kpts_band[k1] # If we have an ewald exxdiv, we add the G=0 correction near the # end of the function to bypass any discretization errors # that arise from the FFT. mydf.exxdiv = exxdiv if exxdiv == 'ewald' or exxdiv is None: coulG = tools.get_coulG(cell, kpt2 - kpt1, False, mydf, mesh) else: coulG = tools.get_coulG(cell, kpt2 - kpt1, True, mydf, mesh) if is_zero(kpt1 - kpt2): expmikr = np.array(1.) else: expmikr = np.exp(-1j * np.dot(coords, kpt2 - kpt1)) for p0, p1 in lib.prange(0, nao, blksize): rho1 = np.einsum('ig,jg->ijg', ao1T[p0:p1].conj() * expmikr, ao2T) vG = tools.fft(rho1.reshape(-1, ngrids), mesh) rho1 = None vG *= coulG vR = tools.ifft(vG, mesh).reshape(p1 - p0, naoj, ngrids) vG = None if vR_dm.dtype == np.double: vR = vR.real for i in range(nset): np.einsum('ijg,jg->ig', vR, ao_dms[i], out=vR_dm[i, p0:p1]) vR = None vR_dm *= expmikr.conj() for i in range(nset): vk_kpts[i, k1] += weight * lib.dot(vR_dm[i], ao1T.T) t1 = lib.logger.timer_debug1(mydf, 'get_k_kpts: make_kpt (%d,*)' % k2, *t1) # Function _ewald_exxdiv_for_G0 to add back in the G=0 component to vk_kpts # Note in the _ewald_exxdiv_for_G0 implementation, the G=0 treatments are # different for 1D/2D and 3D systems. The special treatments for 1D and 2D # can only be used with AFTDF/GDF/MDF method. In the FFTDF method, 1D, 2D # and 3D should use the ewald probe charge correction. if exxdiv == 'ewald': _ewald_exxdiv_for_G0(cell, kpts, dms, vk_kpts, kpts_band=kpts_band) return _format_jks(vk_kpts, dm_kpts, input_band, kpts)
def get_j_kpts(mydf, dm_kpts, hermi=1, kpts=np.zeros((1, 3)), kpts_band=None): '''Get the Coulomb (J) AO matrix at sampled k-points. Args: dm_kpts : (nkpts, nao, nao) ndarray or a list of (nkpts,nao,nao) ndarray Density matrix at each k-point. If a list of k-point DMs, eg, UHF alpha and beta DM, the alpha and beta DMs are contracted separately. kpts : (nkpts, 3) ndarray Kwargs: kpts_band : (3,) ndarray or (*,3) ndarray A list of arbitrary "band" k-points at which to evalute the matrix. Returns: vj : (nkpts, nao, nao) ndarray or list of vj if the input dm_kpts is a list of DMs ''' cell = mydf.cell mesh = mydf.mesh ni = mydf._numint make_rho, nset, nao = ni._gen_rho_evaluator(cell, dm_kpts, hermi) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] coulG = tools.get_coulG(cell, mesh=mesh) ngrids = len(coulG) if hermi == 1 or gamma_point(kpts): vR = rhoR = np.zeros((nset, ngrids)) for ao_ks_etc, p0, p1 in mydf.aoR_loop(mydf.grids, kpts): ao_ks, mask = ao_ks_etc[0], ao_ks_etc[2] for i in range(nset): rhoR[i, p0:p1] += make_rho(i, ao_ks, mask, 'LDA') ao = ao_ks = None for i in range(nset): rhoG = tools.fft(rhoR[i], mesh) vG = coulG * rhoG vR[i] = tools.ifft(vG, mesh).real else: # vR may be complex if the underlying density is complex vR = rhoR = np.zeros((nset, ngrids), dtype=np.complex128) for ao_ks_etc, p0, p1 in mydf.aoR_loop(mydf.grids, kpts): ao_ks, mask = ao_ks_etc[0], ao_ks_etc[2] for i in range(nset): for k, ao in enumerate(ao_ks): ao_dm = lib.dot(ao, dms[i, k]) rhoR[i, p0:p1] += np.einsum('xi,xi->x', ao_dm, ao.conj()) rhoR *= 1. / nkpts for i in range(nset): rhoG = tools.fft(rhoR[i], mesh) vG = coulG * rhoG vR[i] = tools.ifft(vG, mesh) kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) weight = cell.vol / ngrids vR *= weight if gamma_point(kpts_band): vj_kpts = np.zeros((nset, nband, nao, nao)) else: vj_kpts = np.zeros((nset, nband, nao, nao), dtype=np.complex128) for ao_ks_etc, p0, p1 in mydf.aoR_loop(mydf.grids, kpts_band): ao_ks, mask = ao_ks_etc[0], ao_ks_etc[2] for i in range(nset): # ni.eval_mat can handle real vR only # vj_kpts[i] += ni.eval_mat(cell, ao_ks, 1., None, vR[i,p0:p1], mask, 'LDA') for k, ao in enumerate(ao_ks): aow = np.einsum('xi,x->xi', ao, vR[i, p0:p1]) vj_kpts[i, k] += lib.dot(ao.conj().T, aow) return _format_jks(vj_kpts, dm_kpts, input_band, kpts)
def uks_j_xc(mydf, dm_kpts, xc_code, hermi=1, kpts=numpy.zeros((1, 3)), kpts_band=None, with_j=WITH_J, j_in_xc=J_IN_XC): log = lib.logger.Logger(mydf.stdout, mydf.verbose) cell = mydf.cell dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] dms = None #TODO: Handle multiple sets of KUKS density matrices (2,nset,nkpts,nao,nao) assert (nset == 2) # alpha and beta density matrices in KUKS ni = mydf._numint xctype = ni._xc_type(xc_code) if xctype == 'LDA': deriv = 0 rhoG = _eval_rhoG(mydf, dm_kpts, hermi, kpts, deriv=0) def add_j_(v, ao_l, ao_h, idx_l, idx_h, vR): for k in range(nkpts): aow = numpy.einsum('pi,p->pi', ao_l[k], vR[0]) v[0, k, idx_l[:, None], idx_h] += lib.dot(aow.conj().T, ao_h[k]) aow = numpy.einsum('pi,p->pi', ao_l[k], vR[1]) v[1, k, idx_l[:, None], idx_h] += lib.dot(aow.conj().T, ao_h[k]) def add_xc_(v, ao_l, ao_h, idx_l, idx_h, wv): add_j_(v, ao_l, ao_h, idx_l, idx_h, wv[:, 0]) elif xctype == 'GGA': deriv = 1 if RHOG_HIGH_DERIV: rhoG = _eval_rhoG(mydf, dm_kpts, hermi, kpts, deriv) else: Gv = cell.Gv ngrids = Gv.shape[0] rhoG = numpy.empty((2, 4, ngrids), dtype=numpy.complex128) rhoG[:, :1] = _eval_rhoG(mydf, dm_kpts, hermi, kpts, deriv=0) rhoG[:, 1:] = numpy.einsum('np,px->nxp', 1j * rhoG[:, 0], Gv) def add_j_(v, ao_l, ao_h, idx_l, idx_h, vR): for k in range(nkpts): aow = numpy.einsum('pi,p->pi', ao_l[k][0], vR[0]) v[0, k, idx_l[:, None], idx_h] += lib.dot(aow.conj().T, ao_h[k][0]) aow = numpy.einsum('pi,p->pi', ao_l[k][0], vR[1]) v[1, k, idx_l[:, None], idx_h] += lib.dot(aow.conj().T, ao_h[k][0]) def add_xc_(v, ao_l, ao_h, idx_l, idx_h, wv): wva, wvb = wv for k in range(nkpts): aow = numpy.einsum('npi,np->pi', ao_l[k][:4], wva) v1 = lib.dot(aow.conj().T, ao_h[k][0]) aow = numpy.einsum('npi,np->pi', ao_h[k][1:4], wva[1:4]) v1 += lib.dot(ao_l[k][0].conj().T, aow) v[0, k, idx_l[:, None], idx_h] += v1 aow = numpy.einsum('npi,np->pi', ao_l[k][:4], wvb) v1 = lib.dot(aow.conj().T, ao_h[k][0]) aow = numpy.einsum('npi,np->pi', ao_h[k][1:4], wvb[1:4]) v1 += lib.dot(ao_l[k][0].conj().T, aow) v[1, k, idx_l[:, None], idx_h] += v1 else: # MGGA deriv = 2 #TODO: RHOG_HIGH_DERIV: rhoG = _eval_rhoG(mydf, dm_kpts, hermi, kpts, deriv) def add_j_(v, ao_l, ao_h, idx_l, idx_h, vR): for k in range(nkpts): aow = numpy.einsum('pi,p->pi', ao_l[k][0], vR[0]) v[0, k, idx_l[:, None], idx_h] += lib.dot(aow.conj().T, ao_h[k][0]) aow = numpy.einsum('pi,p->pi', ao_l[k][0], vR[1]) v[1, k, idx_l[:, None], idx_h] += lib.dot(aow.conj().T, ao_h[k][0]) def add_xc_(v, ao_l, ao_h, idx_l, idx_h, wv): wva, wvb = wv for k in range(nkpts): aow = numpy.einsum('npi,np->pi', ao_l[k][:4], wva[:4]) v1 = lib.dot(aow.conj().T, ao_h[k][0]) aow = numpy.einsum('npi,np->pi', ao_h[k][1:4], wva[1:4]) v1 += lib.dot(ao_l[k][0].conj().T, aow) aow = numpy.einsum('pi,p->pi', ao_h[k][1], wva[4], out=aow) v1 += lib.dot(ao_l[k][1].conj().T, aow) aow = numpy.einsum('pi,p->pi', ao_h[k][2], wva[4], out=aow) v1 += lib.dot(ao_l[k][2].conj().T, aow) aow = numpy.einsum('pi,p->pi', ao_h[k][3], wva[4], out=aow) v1 += lib.dot(ao_l[k][3].conj().T, aow) v[0, k, idx_l[:, None], idx_h] += v1 aow = numpy.einsum('npi,np->pi', ao_l[k][:4], wvb[:4]) v1 = lib.dot(aow.conj().T, ao_h[k][0]) aow = numpy.einsum('npi,np->pi', ao_h[k][1:4], wvb[1:4]) v1 += lib.dot(ao_l[k][0].conj().T, aow) aow = numpy.einsum('pi,p->pi', ao_h[k][1], wvb[4], out=aow) v1 += lib.dot(ao_l[k][1].conj().T, aow) aow = numpy.einsum('pi,p->pi', ao_h[k][2], wvb[4], out=aow) v1 += lib.dot(ao_l[k][2].conj().T, aow) aow = numpy.einsum('pi,p->pi', ao_h[k][3], wvb[4], out=aow) v1 += lib.dot(ao_l[k][3].conj().T, aow) v[1, k, idx_l[:, None], idx_h] += v1 mesh = cell.mesh coulG = tools.get_coulG(cell, mesh=mesh, low_dim_ft_type=mydf.low_dim_ft_type) ngrids = coulG.size vG = numpy.einsum('ng,g->ng', rhoG[:, 0].reshape(-1, ngrids), coulG) vG = vG.reshape(2, *mesh) weight = cell.vol / ngrids # *(1./weight) because rhoR is scaled by weight in _eval_rhoG. When # computing rhoR with IFFT, the weight factor is not needed. rhoR = tools.ifft(rhoG.reshape(-1, ngrids), mesh) * (1. / weight) rhoR = rhoR.real.reshape(2, -1, ngrids) nelec = numpy.zeros(2) excsum = 0 exc, vxc = ni.eval_xc(xc_code, rhoR, 1, deriv=1)[:2] if xctype == 'LDA': vrho = vxc[0] wva = vrho[:, 0].reshape(1, ngrids) wvb = vrho[:, 1].reshape(1, ngrids) elif xctype == 'GGA': vrho, vsigma = vxc[:2] wva = numpy.empty((4, ngrids)) wvb = numpy.empty((4, ngrids)) wva[0] = vrho[:, 0] wva[1:4] = rhoR[0, 1:4] * (vsigma[:, 0] * 2) # sigma_uu wva[1:4] += rhoR[1, 1:4] * vsigma[:, 1] # sigma_ud wvb[0] = vrho[:, 1] wvb[1:4] = rhoR[1, 1:4] * (vsigma[:, 2] * 2) # sigma_dd wvb[1:4] += rhoR[0, 1:4] * vsigma[:, 1] # sigma_ud else: vrho, vsigma, vlapl, vtau = vxc wva = numpy.empty((5, ngrids)) wvb = numpy.empty((5, ngrids)) wva[0] = vrho[:, 0] wva[1:4] = rhoR[0, 1:4] * (vsigma[:, 0] * 2) # sigma_uu wva[1:4] += rhoR[1, 1:4] * vsigma[:, 1] # sigma_ud wvb[0] = vrho[:, 1] wvb[1:4] = rhoR[1, 1:4] * (vsigma[:, 2] * 2) # sigma_dd wvb[1:4] += rhoR[0, 1:4] * vsigma[:, 1] # sigma_ud if vlapl is None: wvb[4] = .5 * vtau[:, 1] wva[4] = .5 * vtau[:, 0] else: wva[4] = (.5 * vtau[:, 0] + 2 * vlapl[:, 0]) wvb[4] = (.5 * vtau[:, 1] + 2 * vlapl[:, 1]) nelec[0] += rhoR[0, 0].sum() * weight nelec[1] += rhoR[1, 0].sum() * weight excsum += (rhoR[0, 0] * exc).sum() * weight excsum += (rhoR[1, 0] * exc).sum() * weight wv_freq = tools.fft(numpy.vstack((wva, wvb)), mesh) * weight wv_freq = wv_freq.reshape(2, -1, *mesh) if j_in_xc: wv_freq[:, 0] += vG vR = tools.ifft(vG.reshape(-1, ngrids), mesh) ecoul = numpy.einsum('ng,ng->', rhoR[:, 0].real, vR.real) * .5 log.debug('Coulomb energy %s', ecoul) excsum += ecoul rhoR = rhoG = None kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band if gamma_point(kpts_band): veff = numpy.zeros((2, nkpts, nao, nao)) vj = numpy.zeros((2, nkpts, nao, nao)) else: veff = numpy.zeros((2, nkpts, nao, nao), dtype=numpy.complex128) vj = numpy.zeros((2, nkpts, nao, nao), dtype=numpy.complex128) for grids_high, grids_low in mydf.tasks: cell_high = grids_high.cell mesh = grids_high.mesh coords_idx = grids_high.coords_idx ngrids0 = numpy.prod(mesh) ngrids1 = grids_high.coords.shape[0] log.debug('mesh %s, ngrids %s/%s', mesh, ngrids1, ngrids0) gx = numpy.fft.fftfreq(mesh[0], 1. / mesh[0]).astype(int) gy = numpy.fft.fftfreq(mesh[1], 1. / mesh[1]).astype(int) gz = numpy.fft.fftfreq(mesh[2], 1. / mesh[2]).astype(int) sub_wvG = wv_freq[:, :, gx[:, None, None], gy[:, None], gz].reshape(-1, ngrids0) wv = tools.ifft(sub_wvG, mesh).real.reshape(2, -1, ngrids0) wv = wv[:, :, coords_idx] if with_j: sub_vG = vG[:, gx[:, None, None], gy[:, None], gz].reshape(-1, ngrids0) vR = tools.ifft(sub_vG, mesh).real.reshape(2, ngrids0) vR = vR[:, coords_idx] idx_h = grids_high.ao_idx if grids_low is None: for ao_h_etc, p0, p1 in mydf.aoR_loop(grids_high, kpts, deriv): ao_h = ao_h_etc[0] add_xc_(veff, ao_h, ao_h, idx_h, idx_h, wv[:, :, p0:p1]) if with_j: add_j_(vj, ao_h, ao_h, idx_h, idx_h, vR[:, p0:p1]) ao_h = ao_h_etc = None else: idx_l = grids_low.ao_idx for ao_h_etc, ao_l_etc in zip( mydf.aoR_loop(grids_high, kpts, deriv), mydf.aoR_loop(grids_low, kpts, deriv)): p0, p1 = ao_h_etc[1:3] ao_h = ao_h_etc[0][0] ao_l = ao_l_etc[0][0] add_xc_(veff, ao_h, ao_h, idx_h, idx_h, wv[:, :, p0:p1]) add_xc_(veff, ao_h, ao_l, idx_h, idx_l, wv[:, :, p0:p1]) add_xc_(veff, ao_l, ao_h, idx_l, idx_h, wv[:, :, p0:p1]) if with_j: add_j_(vj, ao_h, ao_h, idx_h, idx_h, vR[:, p0:p1]) add_j_(vj, ao_h, ao_l, idx_h, idx_l, vR[:, p0:p1]) add_j_(vj, ao_l, ao_h, idx_l, idx_h, vR[:, p0:p1]) ao_h = ao_l = ao_h_etc = ao_l_etc = None vj = _format_jks(vj, dm_kpts, input_band, kpts) veff = _format_jks(veff, dm_kpts, input_band, kpts) return nelec, excsum, veff, vj
def get_j_e1_kpts(mydf, dm_kpts, kpts=np.zeros((1, 3)), kpts_band=None): '''Derivatives of Coulomb (J) AO matrix at sampled k-points. ''' cell = mydf.cell mesh = mydf.mesh ni = mydf._numint make_rho, nset, nao = ni._gen_rho_evaluator(cell, dm_kpts, hermi=1) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] coulG = tools.get_coulG(cell, mesh=mesh) ngrids = len(coulG) if gamma_point(kpts): vR = rhoR = np.zeros((nset, ngrids)) for ao_ks_etc, p0, p1 in mydf.aoR_loop(mydf.grids, kpts): ao_ks, mask = ao_ks_etc[0], ao_ks_etc[2] for i in range(nset): rhoR[i, p0:p1] += make_rho(i, ao_ks, mask, 'LDA') ao = ao_ks = None for i in range(nset): rhoG = tools.fft(rhoR[i], mesh) vG = coulG * rhoG vR[i] = tools.ifft(vG, mesh).real else: # vR may be complex if the underlying density is complex vR = rhoR = np.zeros((nset, ngrids), dtype=np.complex128) for ao_ks_etc, p0, p1 in mydf.aoR_loop(mydf.grids, kpts): ao_ks, mask = ao_ks_etc[0], ao_ks_etc[2] for i in range(nset): for k, ao in enumerate(ao_ks): ao_dm = lib.dot(ao, dms[i, k]) rhoR[i, p0:p1] += np.einsum('xi,xi->x', ao_dm, ao.conj()) rhoR *= 1. / nkpts for i in range(nset): rhoG = tools.fft(rhoR[i], mesh) vG = coulG * rhoG vR[i] = tools.ifft(vG, mesh) kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) weight = cell.vol / ngrids vR *= weight if gamma_point(kpts_band): vj_kpts = np.zeros((3, nset, nband, nao, nao)) else: vj_kpts = np.zeros((3, nset, nband, nao, nao), dtype=np.complex128) rho = None for ao_ks_etc, p0, p1 in mydf.aoR_loop(mydf.grids, kpts_band, deriv=1): ao_ks, mask = ao_ks_etc[0], ao_ks_etc[2] for i in range(nset): # ni.eval_mat can handle real vR only # vj_kpts[i] += ni.eval_mat(cell, ao_ks, 1., None, vR[i,p0:p1], mask, 'LDA') for k, ao in enumerate(ao_ks): aow = np.einsum('xi,x->xi', ao[0], vR[i, p0:p1]) vj_kpts[:, i, k] -= lib.einsum('axi,xj->aij', ao[1:].conj(), aow) vj_kpts = np.asarray( [_format_jks(vj, dm_kpts, input_band, kpts) for vj in vj_kpts]) return vj_kpts
def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1,3)), kpts_band=None, exxdiv=None): cell = mydf.cell log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (time.clock(), time.time()) mesh = mydf.mesh dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] swap_2e = (kpts_band is None) kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) kk_table = kpts_band.reshape(-1,1,3) - kpts.reshape(1,-1,3) kk_todo = numpy.ones(kk_table.shape[:2], dtype=bool) vkR = numpy.zeros((nset,nband,nao,nao)) vkI = numpy.zeros((nset,nband,nao,nao)) dmsR = numpy.asarray(dms.real, order='C') dmsI = numpy.asarray(dms.imag, order='C') mem_now = lib.current_memory()[0] max_memory = max(2000, (mydf.max_memory - mem_now)) * .8 log.debug1('max_memory = %d MB (%d in use)', max_memory, mem_now) # K_pq = ( p{k1} i{k2} | i{k2} q{k1} ) def make_kpt(kpt): # kpt = kptj - kpti # search for all possible ki and kj that has ki-kj+kpt=0 kk_match = numpy.einsum('ijx->ij', abs(kk_table + kpt)) < 1e-9 kpti_idx, kptj_idx = numpy.where(kk_todo & kk_match) nkptj = len(kptj_idx) log.debug1('kpt = %s', kpt) log.debug2('kpti_idx = %s', kpti_idx) log.debug2('kptj_idx = %s', kptj_idx) kk_todo[kpti_idx,kptj_idx] = False if swap_2e and not is_zero(kpt): kk_todo[kptj_idx,kpti_idx] = False max_memory1 = max_memory * (nkptj+1)/(nkptj+5) #blksize = max(int(max_memory1*4e6/(nkptj+5)/16/nao**2), 16) #bufR = numpy.empty((blksize*nao**2)) #bufI = numpy.empty((blksize*nao**2)) # Use DF object to mimic KRHF/KUHF object in function get_coulG mydf.exxdiv = exxdiv vkcoulG = mydf.weighted_coulG(kpt, True, mesh) kptjs = kpts[kptj_idx] # <r|-G+k_rs|s> = conj(<s|G-k_rs|r>) = conj(<s|G+k_sr|r>) #buf1R = numpy.empty((blksize*nao**2)) #buf1I = numpy.empty((blksize*nao**2)) for aoaoks, p0, p1 in mydf.ft_loop(mesh, kpt, kptjs, max_memory=max_memory1): nG = p1 - p0 bufR = numpy.empty((nG*nao**2)) bufI = numpy.empty((nG*nao**2)) buf1R = numpy.empty((nG*nao**2)) buf1I = numpy.empty((nG*nao**2)) for k, aoao in enumerate(aoaoks): ki = kpti_idx[k] kj = kptj_idx[k] # case 1: k_pq = (pi|iq) #:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,jk->il', v4, dm) pLqR = numpy.ndarray((nao,nG,nao), buffer=bufR) pLqI = numpy.ndarray((nao,nG,nao), buffer=bufI) pLqR[:] = aoao.real.reshape(nG,nao,nao).transpose(1,0,2) pLqI[:] = aoao.imag.reshape(nG,nao,nao).transpose(1,0,2) iLkR = numpy.ndarray((nao,nG,nao), buffer=buf1R) iLkI = numpy.ndarray((nao,nG,nao), buffer=buf1I) for i in range(nset): zdotNN(pLqR.reshape(-1,nao), pLqI.reshape(-1,nao), dmsR[i,kj], dmsI[i,kj], 1, iLkR.reshape(-1,nao), iLkI.reshape(-1,nao)) iLkR *= vkcoulG[p0:p1].reshape(1,nG,1) iLkI *= vkcoulG[p0:p1].reshape(1,nG,1) zdotNC(iLkR.reshape(nao,-1), iLkI.reshape(nao,-1), pLqR.reshape(nao,-1).T, pLqI.reshape(nao,-1).T, 1, vkR[i,ki], vkI[i,ki], 1) # case 2: k_pq = (iq|pi) #:v4 = numpy.einsum('iLj,lLk->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,li->kj', v4, dm) if swap_2e and not is_zero(kpt): for i in range(nset): zdotNN(dmsR[i,ki], dmsI[i,ki], pLqR.reshape(nao,-1), pLqI.reshape(nao,-1), 1, iLkR.reshape(nao,-1), iLkI.reshape(nao,-1)) iLkR *= vkcoulG[p0:p1].reshape(1,nG,1) iLkI *= vkcoulG[p0:p1].reshape(1,nG,1) zdotCN(pLqR.reshape(-1,nao).T, pLqI.reshape(-1,nao).T, iLkR.reshape(-1,nao), iLkI.reshape(-1,nao), 1, vkR[i,kj], vkI[i,kj], 1) for ki, kpti in enumerate(kpts_band): for kj, kptj in enumerate(kpts): if kk_todo[ki,kj]: make_kpt(kptj-kpti) t1 = log.timer_debug1('get_k_kpts: make_kpt (%d,*)'%ki, *t1) if (gamma_point(kpts) and gamma_point(kpts_band) and not numpy.iscomplexobj(dm_kpts)): vk_kpts = vkR else: vk_kpts = vkR + vkI * 1j vk_kpts *= 1./nkpts # Add ewald_exxdiv contribution because G=0 was not included in the # non-uniform grids if (exxdiv == 'ewald' and (cell.dimension < 2 or # 0D and 1D are computed with inf_vacuum (cell.dimension == 2 and cell.low_dim_ft_type == 'inf_vacuum'))): _ewald_exxdiv_for_G0(cell, kpts_band, dms, vk_kpts, kpts_band) return _format_jks(vk_kpts, dm_kpts, input_band, kpts)
def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1,3)), kpt_band=None, exxdiv=None): cell = mydf.cell log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (time.clock(), time.time()) if mydf._cderi is None: mydf.build() t1 = log.timer_debug1('Init get_k_kpts', *t1) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] naux = mydf.auxcell.nao_nr() nao_pair = nao * (nao+1) // 2 if kpt_band is None: kpts_band = kpts swap_2e = True else: kpts_band = numpy.reshape(kpt_band, (-1,3)) nband = len(kpts_band) kk_table = kpts_band.reshape(-1,1,3) - kpts.reshape(1,-1,3) kk_todo = numpy.ones(kk_table.shape[:2], dtype=bool) vkR = numpy.zeros((nset,nband,nao,nao)) vkI = numpy.zeros((nset,nband,nao,nao)) dmsR = numpy.asarray(dms.real, order='C') dmsI = numpy.asarray(dms.imag, order='C') mem_now = lib.current_memory()[0] max_memory = max(2000, (mydf.max_memory - mem_now)) * .8 log.debug1('max_memory = %d MB (%d in use)', max_memory, mem_now) # K_pq = ( p{k1} i{k2} | i{k2} q{k1} ) def make_kpt(kpt): # kpt = kptj - kpti # search for all possible ki and kj that has ki-kj+kpt=0 kk_match = numpy.einsum('ijx->ij', abs(kk_table + kpt)) < 1e-9 kpti_idx, kptj_idx = numpy.where(kk_todo & kk_match) nkptj = len(kptj_idx) kk_todo[kpti_idx,kptj_idx] = False if swap_2e and not is_zero(kpt): kk_todo[kptj_idx,kpti_idx] = False # Note: kj-ki for electorn 1 and ki-kj for electron 2 # j2c ~ ({kj-ki}|{ks-kr}) ~ ({kj-ki}|-{kj-ki}) ~ ({kj-ki}|{ki-kj}) # j3c ~ (Q|kj,ki) = j3c{ji} = (Q|ki,kj)* = conj(transpose(j3c{ij}, (0,2,1))) bufR = numpy.empty((mydf.blockdim*nao**2)) bufI = numpy.empty((mydf.blockdim*nao**2)) for ki,kj in zip(kpti_idx,kptj_idx): kpti = kpts_band[ki] kptj = kpts[kj] kptij = numpy.asarray((kpti,kptj)) for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptij, max_memory, False): nrow = LpqR.shape[0] pLqR = numpy.ndarray((nao,nrow,nao), buffer=bufR) pLqI = numpy.ndarray((nao,nrow,nao), buffer=bufI) pjqR = numpy.ndarray((nao,nrow,nao), buffer=LpqR) pjqI = numpy.ndarray((nao,nrow,nao), buffer=LpqI) tmpR = numpy.ndarray((nao,nrow*nao), buffer=j3cR) tmpI = numpy.ndarray((nao,nrow*nao), buffer=j3cI) pLqR[:] = LpqR.reshape(-1,nao,nao).transpose(1,0,2) pLqI[:] = LpqI.reshape(-1,nao,nao).transpose(1,0,2) pjqR[:] = j3cR.reshape(-1,nao,nao).transpose(1,0,2) pjqI[:] = j3cI.reshape(-1,nao,nao).transpose(1,0,2) #:Lpq = LpqR + LpqI*1j #:j3c = j3cR + j3cI*1j #:for i in range(nset): #: dm = dms[i,ki] #: tmp = numpy.dot(dm, j3c.reshape(nao,-1)) #: vk1 = numpy.dot(Lpq.reshape(-1,nao).conj().T, tmp.reshape(-1,nao)) #: tmp = numpy.dot(dm, Lpq.reshape(nao,-1)) #: vk1+= numpy.dot(j3c.reshape(-1,nao).conj().T, tmp.reshape(-1,nao)) #: vkR[i,kj] += vk1.real #: vkI[i,kj] += vk1.imag #:if swap_2e and not is_zero(kpt): #: # K ~ 'Lij,Llk*,jk->il' + 'Llk*,Lij,jk->il' #: for i in range(nset): #: dm = dms[i,kj] #: tmp = numpy.dot(j3c.reshape(-1,nao), dm) #: vk1 = numpy.dot(tmp.reshape(nao,-1), Lpq.reshape(nao,-1).conj().T) #: tmp = numpy.dot(Lpq.reshape(-1,nao), dm) #: vk1+= numpy.dot(tmp.reshape(nao,-1), j3c.reshape(nao,-1).conj().T) #: vkR[i,ki] += vk1.real #: vkI[i,ki] += vk1.imag # K ~ 'iLj,lLk*,li->kj' + 'lLk*,iLj,li->kj' for i in range(nset): tmpR, tmpI = zdotNN(dmsR[i,ki], dmsI[i,ki], pjqR.reshape(nao,-1), pjqI.reshape(nao,-1), 1, tmpR, tmpI) vk1R, vk1I = zdotCN(pLqR.reshape(-1,nao).T, pLqI.reshape(-1,nao).T, tmpR.reshape(-1,nao), tmpI.reshape(-1,nao)) vkR[i,kj] += vk1R vkI[i,kj] += vk1I if hermi: vkR[i,kj] += vk1R.T vkI[i,kj] -= vk1I.T else: tmpR, tmpI = zdotNN(dmsR[i,ki], dmsI[i,ki], pLqR.reshape(nao,-1), pLqI.reshape(nao,-1), 1, tmpR, tmpI) zdotCN(pjqR.reshape(-1,nao).T, pjqI.reshape(-1,nao).T, tmpR.reshape(-1,nao), tmpI.reshape(-1,nao), 1, vkR[i,kj], vkI[i,kj], 1) if swap_2e and not is_zero(kpt): tmpR = tmpR.reshape(nao*nrow,nao) tmpI = tmpI.reshape(nao*nrow,nao) # K ~ 'iLj,lLk*,jk->il' + 'lLk*,iLj,jk->il' for i in range(nset): tmpR, tmpI = zdotNN(pjqR.reshape(-1,nao), pjqI.reshape(-1,nao), dmsR[i,kj], dmsI[i,kj], 1, tmpR, tmpI) vk1R, vk1I = zdotNC(tmpR.reshape(nao,-1), tmpI.reshape(nao,-1), pLqR.reshape(nao,-1).T, pLqI.reshape(nao,-1).T) vkR[i,ki] += vk1R vkI[i,ki] += vk1I if hermi: vkR[i,ki] += vk1R.T vkI[i,ki] -= vk1I.T else: tmpR, tmpI = zdotNN(pLqR.reshape(-1,nao), pLqI.reshape(-1,nao), dmsR[i,kj], dmsI[i,kj], 1, tmpR, tmpI) zdotNC(tmpR.reshape(nao,-1), tmpI.reshape(nao,-1), pjqR.reshape(nao,-1).T, pjqI.reshape(nao,-1).T, 1, vkR[i,ki], vkI[i,ki], 1) LpqR = LpqI = j3cR = j3cI = tmpR = tmpI = None return None for ki, kpti in enumerate(kpts_band): for kj, kptj in enumerate(kpts): if kk_todo[ki,kj]: make_kpt(kptj-kpti) vkR *= 1./nkpts vkI *= 1./nkpts vk_kpts = pwdf_jk.get_k_kpts(mydf, dm_kpts, hermi, kpts, kpt_band, exxdiv) if (gamma_point(kpts) and gamma_point(kpts_band) and not numpy.iscomplexobj(dm_kpts)): vk_kpts += vkR.reshape(vk_kpts.shape) else: vk_kpts += (vkR+vkI*1j).reshape(vk_kpts.shape) return vk_kpts
def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=np.zeros((1, 3)), kpts_band=None, exxdiv=None): '''Get the Coulomb (J) and exchange (K) AO matrices at sampled k-points. Args: dm_kpts : (nkpts, nao, nao) ndarray Density matrix at each k-point kpts : (nkpts, 3) ndarray Kwargs: kpts_band : (3,) ndarray or (*,3) ndarray A list of arbitrary "band" k-points at which to evalute the matrix. Returns: vj : (nkpts, nao, nao) ndarray vk : (nkpts, nao, nao) ndarray or list of vj and vk if the input dm_kpts is a list of DMs ''' cell = mydf.cell gs = mydf.gs coords = cell.gen_uniform_grids(gs) ngs = coords.shape[0] if hasattr(dm_kpts, 'mo_coeff'): mo_coeff = dm_kpts.mo_coeff mo_occ = dm_kpts.mo_occ else: mo_coeff = None kpts = np.asarray(kpts) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] weight = 1. / nkpts * (cell.vol / ngs) kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) if gamma_point(kpts_band) and gamma_point(kpts): vk_kpts = np.zeros((nset, nband, nao, nao), dtype=dms.dtype) else: vk_kpts = np.zeros((nset, nband, nao, nao), dtype=np.complex128) ao2_kpts = mydf._numint.eval_ao(cell, coords, kpts, non0tab=mydf.non0tab) ao2_kpts = [np.asarray(ao.T, order='C') for ao in ao2_kpts] if input_band is None: ao1_kpts = ao2_kpts else: ao1_kpts = mydf._numint.eval_ao(cell, coords, kpts_band, non0tab=mydf.non0tab) ao1_kpts = [np.asarray(ao.T, order='C') for ao in ao1_kpts] if mo_coeff is not None and nset == 1: mo_coeff = [ mo_coeff[k][:, occ > 0] * np.sqrt(occ[occ > 0]) for k, occ in enumerate(mo_occ) ] ao2_kpts = [np.dot(mo_coeff[k].T, ao) for k, ao in enumerate(ao2_kpts)] naoj = ao2_kpts[0].shape[0] else: naoj = nao max_memory = mydf.max_memory - lib.current_memory()[0] blksize = int(max(max_memory * 1e6 / 16 / 2 / ngs / nao, 1)) ao1_dtype = np.result_type(*ao1_kpts) ao2_dtype = np.result_type(*ao2_kpts) buf = np.empty((blksize, naoj, ngs), dtype=np.result_type(ao1_dtype, ao2_dtype)) vR_dm = np.empty((nset, nao, ngs), dtype=vk_kpts.dtype) ao_dms = np.empty((nset, naoj, ngs), dtype=np.result_type(dms, ao2_dtype)) for k2, ao2T in enumerate(ao2_kpts): kpt2 = kpts[k2] if mo_coeff is None or nset > 1: for i in range(nset): lib.dot(dms[i, k2], ao2T.conj(), c=ao_dms[i]) else: ao_dms = [ao2T.conj()] for k1, ao1T in enumerate(ao1_kpts): kpt1 = kpts_band[k1] mydf.exxdiv = exxdiv coulG = tools.get_coulG(cell, kpt2 - kpt1, True, mydf, gs) if is_zero(kpt1 - kpt2): expmikr = np.array(1.) else: expmikr = np.exp(-1j * np.dot(coords, kpt2 - kpt1)) for p0, p1 in lib.prange(0, nao, blksize): rho1 = np.einsum('ig,jg->ijg', ao1T[p0:p1].conj() * expmikr, ao2T, out=buf[:p1 - p0]) vG = tools.fft(rho1.reshape(-1, ngs), gs) vG *= coulG vR = tools.ifft(vG, gs).reshape(p1 - p0, naoj, ngs) vG = None if vR_dm.dtype == np.double: vR = vR.real for i in range(nset): np.einsum('ijg,jg->ig', vR, ao_dms[i], out=vR_dm[i, p0:p1]) vR = None vR_dm *= expmikr.conj() for i in range(nset): vk_kpts[i, k1] += weight * lib.dot(vR_dm[i], ao1T.T) return _format_jks(vk_kpts, dm_kpts, input_band, kpts)
def get_jk(mydf, dm, hermi=1, kpt=numpy.zeros(3), kpt_band=None, with_j=True, with_k=True, exxdiv=None): '''JK for given k-point''' vj = vk = None if kpt_band is not None and abs(kpt-kpt_band).sum() > 1e-9: kpt = numpy.reshape(kpt, (1,3)) if with_k: vk = get_k_kpts(mydf, [dm], hermi, kpt, kpt_band, exxdiv) if with_j: vj = get_j_kpts(mydf, [dm], hermi, kpt, kpt_band) return vj, vk log = logger.Logger(mydf.stdout, mydf.verbose) t2 = t1 = (time.clock(), time.time()) if mydf._cderi is None: mydf.build() t1 = log.timer_debug1('Init get_jk', *t1) dm = numpy.asarray(dm, order='C') dms = _format_dms(dm, [kpt]) nset, _, nao = dms.shape[:3] dms = dms.reshape(nset,nao,nao) j_real = gamma_point(kpt) k_real = gamma_point(kpt) and not numpy.iscomplexobj(dms) kptii = numpy.asarray((kpt,kpt)) # .45 is estimation for the memory usage ratio sr_loop / (sr_loop+bufR+bufI) dmsR = numpy.asarray(dms.real.reshape(nset,nao,nao), order='C') dmsI = numpy.asarray(dms.imag.reshape(nset,nao,nao), order='C') if with_j: vjR = numpy.zeros((nset,nao,nao)) vjI = numpy.zeros((nset,nao,nao)) if with_k: vkR = numpy.zeros((nset,nao,nao)) vkI = numpy.zeros((nset,nao,nao)) max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0])) * .45 if with_k: buf1R = numpy.empty((mydf.blockdim*nao**2)) buf2R = numpy.empty((mydf.blockdim*nao**2)) buf3R = numpy.empty((mydf.blockdim*nao**2)) if not k_real: buf1I = numpy.empty((mydf.blockdim*nao**2)) buf2I = numpy.empty((mydf.blockdim*nao**2)) buf3I = numpy.empty((mydf.blockdim*nao**2)) def contract_k(pLqR, pLqI, pjqR, pjqI): # K ~ 'iLj,lLk*,li->kj' + 'lLk*,iLj,li->kj' #:Lpq = LpqR + LpqI*1j #:j3c = j3cR + j3cI*1j #:for i in range(nset): #: tmp = numpy.dot(dms[i], j3c.reshape(nao,-1)) #: vk1 = numpy.dot(Lpq.reshape(-1,nao).conj().T, tmp.reshape(-1,nao)) #: tmp = numpy.dot(dms[i], Lpq.reshape(nao,-1)) #: vk1+= numpy.dot(j3c.reshape(-1,nao).conj().T, tmp.reshape(-1,nao)) #: vkR[i] += vk1.real #: vkI[i] += vk1.imag nrow = pLqR.shape[1] tmpR = numpy.ndarray((nao,nrow*nao), buffer=buf3R) if k_real: for i in range(nset): tmpR = lib.ddot(dmsR[i], pjqR.reshape(nao,-1), 1, tmpR) vk1R = lib.ddot(pLqR.reshape(-1,nao).T, tmpR.reshape(-1,nao)) vkR[i] += vk1R if hermi: vkR[i] += vk1R.T else: tmpR = lib.ddot(dmsR[i], pLqR.reshape(nao,-1), 1, tmpR) lib.ddot(pjqR.reshape(-1,nao).T, tmpR.reshape(-1,nao), 1, vkR[i], 1) else: tmpI = numpy.ndarray((nao,nrow*nao), buffer=buf3I) for i in range(nset): tmpR, tmpI = zdotNN(dmsR[i], dmsI[i], pjqR.reshape(nao,-1), pjqI.reshape(nao,-1), 1, tmpR, tmpI, 0) vk1R, vk1I = zdotCN(pLqR.reshape(-1,nao).T, pLqI.reshape(-1,nao).T, tmpR.reshape(-1,nao), tmpI.reshape(-1,nao)) vkR[i] += vk1R vkI[i] += vk1I if hermi: vkR[i] += vk1R.T vkI[i] -= vk1I.T else: tmpR, tmpI = zdotNN(dmsR[i], dmsI[i], pLqR.reshape(nao,-1), pLqI.reshape(nao,-1), 1, tmpR, tmpI, 0) zdotCN(pjqR.reshape(-1,nao).T, pjqI.reshape(-1,nao).T, tmpR.reshape(-1,nao), tmpI.reshape(-1,nao), 1, vkR[i], vkI[i], 1) pLqI = pjqI = None thread_k = None for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptii, max_memory, False): LpqR = LpqR.reshape(-1,nao,nao) LpqI = LpqI.reshape(-1,nao,nao) j3cR = j3cR.reshape(-1,nao,nao) j3cI = j3cI.reshape(-1,nao,nao) t2 = log.timer_debug1(' load', *t2) if thread_k is not None: thread_k.join() if with_j: #:rho_coeff = numpy.einsum('Lpq,xqp->xL', Lpq, dms) #:jaux = numpy.einsum('Lpq,xqp->xL', j3c, dms) #:vj += numpy.dot(jaux, Lpq.reshape(-1,nao**2)) #:vj += numpy.dot(rho_coeff, j3c.reshape(-1,nao**2)) rhoR = numpy.einsum('Lpq,xqp->xL', LpqR, dmsR) jauxR = numpy.einsum('Lpq,xqp->xL', j3cR, dmsR) if not j_real: rhoR -= numpy.einsum('Lpq,xqp->xL', LpqI, dmsI) rhoI = numpy.einsum('Lpq,xqp->xL', LpqR, dmsI) rhoI += numpy.einsum('Lpq,xqp->xL', LpqI, dmsR) jauxR-= numpy.einsum('Lpq,xqp->xL', j3cI, dmsI) jauxI = numpy.einsum('Lpq,xqp->xL', j3cR, dmsI) jauxI+= numpy.einsum('Lpq,xqp->xL', j3cI, dmsR) vjR += numpy.einsum('xL,Lpq->xpq', jauxR, LpqR) vjR += numpy.einsum('xL,Lpq->xpq', rhoR, j3cR) if not j_real: vjR -= numpy.einsum('xL,Lpq->xpq', jauxI, LpqI) vjR -= numpy.einsum('xL,Lpq->xpq', rhoI, j3cI) vjI += numpy.einsum('xL,Lpq->xpq', jauxR, LpqI) vjI += numpy.einsum('xL,Lpq->xpq', jauxI, LpqR) vjI += numpy.einsum('xL,Lpq->xpq', rhoR, j3cI) vjI += numpy.einsum('xL,Lpq->xpq', rhoI, j3cR) t2 = log.timer_debug1(' with_j', *t2) if with_k: nrow = LpqR.shape[0] pLqR = numpy.ndarray((nao,nrow,nao), buffer=buf1R) pjqR = numpy.ndarray((nao,nrow,nao), buffer=buf2R) pLqR[:] = LpqR.transpose(1,0,2) pjqR[:] = j3cR.transpose(1,0,2) if not k_real: pLqI = numpy.ndarray((nao,nrow,nao), buffer=buf1I) pjqI = numpy.ndarray((nao,nrow,nao), buffer=buf2I) pLqI[:] = LpqI.transpose(1,0,2) pjqI[:] = j3cI.transpose(1,0,2) thread_k = lib.background_thread(contract_k, pLqR, pLqI, pjqR, pjqI) t2 = log.timer_debug1(' with_k', *t2) LpqR = LpqI = j3cR = j3cI = None if thread_k is not None: thread_k.join() thread_k = None t1 = log.timer_debug1('mdf_jk.get_jk pass 1', *t1) vj, vk = pwdf_jk.get_jk(mydf, dm, hermi, kpt, kpt_band, with_j, with_k, exxdiv) if with_j: if j_real: vj += vjR.reshape(dm.shape) else: vj += (vjR+vjI*1j).reshape(dm.shape) vj = vj if with_k: if k_real: vk += vkR.reshape(dm.shape) else: vk += (vkR+vkI*1j).reshape(dm.shape) return vj, vk
def _eval_rhoG(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1, 3)), deriv=0): log = lib.logger.Logger(mydf.stdout, mydf.verbose) cell = mydf.cell dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] tasks = getattr(mydf, 'tasks', None) if tasks is None: mydf.tasks = tasks = multi_grids_tasks(cell, log) log.debug('Multigrid ntasks %s', len(tasks)) assert (deriv <= 2) if abs(dms - dms.transpose(0, 1, 3, 2).conj()).max() < 1e-9: def dot_bra(bra, aodm): rho = numpy.einsum('pi,pi->p', bra.real, aodm.real) if aodm.dtype == numpy.complex: rho += numpy.einsum('pi,pi->p', bra.imag, aodm.imag) return rho if deriv == 0: xctype = 'LDA' rhodim = 1 def make_rho(ao_l, ao_h, dm_lh, dm_hl): c0 = lib.dot(ao_l, dm_lh) rho = dot_bra(ao_h, c0) return rho * 2 elif deriv == 1: xctype = 'GGA' rhodim = 4 def make_rho(ao_l, ao_h, dm_lh, dm_hl): ngrids = ao_l[0].shape[0] rho = numpy.empty((4, ngrids)) c0 = lib.dot(ao_l[0], dm_lh) rho[0] = dot_bra(ao_h[0], c0) for i in range(1, 4): rho[i] = dot_bra(ao_h[i], c0) c0 = lib.dot(ao_h[0], dm_hl) for i in range(1, 4): rho[i] += dot_bra(ao_l[i], c0) return rho * 2 # *2 for dm_lh+dm_hl.T elif deriv == 2: xctype = 'MGGA' rhodim = 6 def make_rho(ao_l, ao_h, dm_lh, dm_hl): ngrids = ao_l[0].shape[0] rho = numpy.empty((6, ngrids)) c = [lib.dot(ao_l[i], dm_lh) for i in range(4)] rho[0] = dot_bra(ao_h[0], c[0]) rho[5] = 0 for i in range(1, 4): rho[i] = dot_bra(ao_h[i], c[0]) rho[i] += dot_bra(ao_h[0], c[i]) rho[5] += dot_bra(ao_h[i], c[i]) * 2 XX, YY, ZZ = 4, 7, 9 ao2 = ao_h[XX] + ao_h[YY] + ao_h[ZZ] rho[4] = dot_bra(ao2, c[0]) ao2 = lib.dot(ao_l[XX] + ao_l[YY] + ao_l[ZZ], dm_lh) rho[4] += dot_bra(ao2, ao_h[0]) rho[4] += rho[5] * 2 rho[5] *= .5 return rho * 2 # *2 for dm_lh+dm_hl.T else: raise NotImplementedError('Non-hermitian density matrices') ni = mydf._numint nx, ny, nz = cell.mesh rhoG = numpy.zeros((nset * rhodim, nx, ny, nz), dtype=numpy.complex) for grids_high, grids_low in tasks: cell_high = grids_high.cell mesh = grids_high.mesh coords_idx = grids_high.coords_idx ngrids0 = numpy.prod(mesh) ngrids1 = grids_high.coords.shape[0] log.debug('mesh %s, ngrids %s/%s', mesh, ngrids1, ngrids0) idx_h = grids_high.ao_idx dms_hh = numpy.asarray(dms[:, :, idx_h[:, None], idx_h], order='C') if grids_low is not None: idx_l = grids_low.ao_idx dms_hl = numpy.asarray(dms[:, :, idx_h[:, None], idx_l], order='C') dms_lh = numpy.asarray(dms[:, :, idx_l[:, None], idx_h], order='C') rho = numpy.zeros((nset, rhodim, ngrids1)) if grids_low is None: for ao_h_etc, p0, p1 in mydf.aoR_loop(grids_high, kpts, deriv): ao_h, mask = ao_h_etc[0], ao_h_etc[2] for k in range(nkpts): for i in range(nset): rho_sub = numint.eval_rho(cell_high, ao_h[k], dms_hh[i, k], mask, xctype, hermi) rho[i, :, p0:p1] += rho_sub.real ao_h = ao_h_etc = None else: for ao_h_etc, ao_l_etc in zip( mydf.aoR_loop(grids_high, kpts, deriv), mydf.aoR_loop(grids_low, kpts, deriv)): p0, p1 = ao_h_etc[1:3] ao_h, mask = ao_h_etc[0][0], ao_h_etc[0][2] ao_l = ao_l_etc[0][0] for k in range(nkpts): for i in range(nset): rho_sub = numint.eval_rho(cell_high, ao_h[k], dms_hh[i, k], mask, xctype, hermi) rho[i, :, p0:p1] += rho_sub.real rho_sub = make_rho(ao_l[k], ao_h[k], dms_lh[i, k], dms_hl[i, k]) rho[i, :, p0:p1] += rho_sub.real ao_h = ao_l = ao_h_etc = ao_l_etc = None rho *= 1. / nkpts rhoR = numpy.zeros((nset * rhodim, ngrids0)) rhoR[:, coords_idx] = rho.reshape(nset * rhodim, ngrids1) gx = numpy.fft.fftfreq(mesh[0], 1. / mesh[0]).astype(int) gy = numpy.fft.fftfreq(mesh[1], 1. / mesh[1]).astype(int) gz = numpy.fft.fftfreq(mesh[2], 1. / mesh[2]).astype(int) rho_freq = tools.fft(rhoR, mesh) * cell.vol / ngrids0 for i in range(nset * rhodim): rhoG[i, gx[:, None, None], gy[:, None], gz] += rho_freq[i].reshape(mesh) return rhoG.reshape(nset, rhodim, ngrids0)
def get_jk(mydf, dm, hermi=1, kpt=numpy.zeros(3), kpts_band=None, with_j=True, with_k=True, exxdiv=None): '''JK for given k-point''' vj = vk = None if kpts_band is not None and abs(kpt-kpts_band).sum() > 1e-9: kpt = numpy.reshape(kpt, (1,3)) if with_k: vk = get_k_kpts(mydf, dm, hermi, kpt, kpts_band, exxdiv) if with_j: vj = get_j_kpts(mydf, dm, hermi, kpt, kpts_band) return vj, vk cell = mydf.cell log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (logger.process_clock(), logger.perf_counter()) dm = numpy.asarray(dm, order='C') dms = _format_dms(dm, [kpt]) nset, _, nao = dms.shape[:3] dms = dms.reshape(nset,nao,nao) j_real = gamma_point(kpt) k_real = gamma_point(kpt) and not numpy.iscomplexobj(dms) mesh = mydf.mesh kptii = numpy.asarray((kpt,kpt)) kpt_allow = numpy.zeros(3) if with_j: vjcoulG = mydf.weighted_coulG(kpt_allow, False, mesh) vjR = numpy.zeros((nset,nao,nao)) vjI = numpy.zeros((nset,nao,nao)) if with_k: vkcoulG = mydf.weighted_coulG(kpt_allow, exxdiv, mesh) vkR = numpy.zeros((nset,nao,nao)) vkI = numpy.zeros((nset,nao,nao)) dmsR = numpy.asarray(dms.real.reshape(nset,nao,nao), order='C') dmsI = numpy.asarray(dms.imag.reshape(nset,nao,nao), order='C') mem_now = lib.current_memory()[0] max_memory = max(2000, (mydf.max_memory - mem_now)) * .8 log.debug1('max_memory = %d MB (%d in use)', max_memory, mem_now) t2 = t1 # rho_rs(-G+k_rs) is computed as conj(rho_{rs^*}(G-k_rs)) # == conj(transpose(rho_sr(G+k_sr), (0,2,1))) #blksize = max(int(max_memory*.25e6/16/nao**2), 16) pLqR = pLqI = None for pqkR, pqkI, p0, p1 in mydf.pw_loop(mesh, kptii, max_memory=max_memory): t2 = log.timer_debug1('%d:%d ft_aopair'%(p0,p1), *t2) pqkR = pqkR.reshape(nao,nao,-1) pqkI = pqkI.reshape(nao,nao,-1) if with_j: #:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj()) #:vj += numpy.einsum('ijkl,lk->ij', v4, dm) for i in range(nset): rhoR = numpy.einsum('pq,pqk->k', dmsR[i], pqkR) rhoR+= numpy.einsum('pq,pqk->k', dmsI[i], pqkI) rhoI = numpy.einsum('pq,pqk->k', dmsI[i], pqkR) rhoI-= numpy.einsum('pq,pqk->k', dmsR[i], pqkI) rhoR *= vjcoulG[p0:p1] rhoI *= vjcoulG[p0:p1] vjR[i] += numpy.einsum('pqk,k->pq', pqkR, rhoR) vjR[i] -= numpy.einsum('pqk,k->pq', pqkI, rhoI) if not j_real: vjI[i] += numpy.einsum('pqk,k->pq', pqkR, rhoI) vjI[i] += numpy.einsum('pqk,k->pq', pqkI, rhoR) #t2 = log.timer_debug1(' with_j', *t2) if with_k: #:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,jk->il', v4, dm) pLqR = lib.transpose(pqkR, axes=(0,2,1), out=pLqR).reshape(-1,nao) pLqI = lib.transpose(pqkI, axes=(0,2,1), out=pLqI).reshape(-1,nao) nG = p1 - p0 iLkR = numpy.ndarray((nao,nG,nao), buffer=pqkR) iLkI = numpy.ndarray((nao,nG,nao), buffer=pqkI) for i in range(nset): if k_real: lib.dot(pLqR, dmsR[i], 1, iLkR.reshape(nao*nG,nao)) lib.dot(pLqI, dmsR[i], 1, iLkI.reshape(nao*nG,nao)) iLkR *= vkcoulG[p0:p1].reshape(1,nG,1) iLkI *= vkcoulG[p0:p1].reshape(1,nG,1) lib.dot(iLkR.reshape(nao,-1), pLqR.reshape(nao,-1).T, 1, vkR[i], 1) lib.dot(iLkI.reshape(nao,-1), pLqI.reshape(nao,-1).T, 1, vkR[i], 1) else: zdotNN(pLqR, pLqI, dmsR[i], dmsI[i], 1, iLkR.reshape(-1,nao), iLkI.reshape(-1,nao)) iLkR *= vkcoulG[p0:p1].reshape(1,nG,1) iLkI *= vkcoulG[p0:p1].reshape(1,nG,1) zdotNC(iLkR.reshape(nao,-1), iLkI.reshape(nao,-1), pLqR.reshape(nao,-1).T, pLqI.reshape(nao,-1).T, 1, vkR[i], vkI[i]) #t2 = log.timer_debug1(' with_k', *t2) pqkR = pqkI = pLqR = pLqI = iLkR = iLkI = None #t2 = log.timer_debug1('%d:%d'%(p0,p1), *t2) t1 = log.timer_debug1('aft_jk.get_jk', *t1) if with_j: if j_real: vj = vjR else: vj = vjR + vjI * 1j vj = vj.reshape(dm.shape) if with_k: if k_real: vk = vkR else: vk = vkR + vkI * 1j # Add ewald_exxdiv contribution because G=0 was not included in the # non-uniform grids if (exxdiv == 'ewald' and (cell.dimension < 2 or # 0D and 1D are computed with inf_vacuum (cell.dimension == 2 and cell.low_dim_ft_type == 'inf_vacuum'))): _ewald_exxdiv_for_G0(cell, kpt, dms, vk) vk = vk.reshape(dm.shape) return vj, vk
def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1,3)), kpt_band=None, exxdiv=None): cell = mydf.cell log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (time.clock(), time.time()) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] if kpt_band is None: kpts_band = kpts swap_2e = True else: kpts_band = numpy.reshape(kpt_band, (-1,3)) nband = len(kpts_band) kk_table = kpts_band.reshape(-1,1,3) - kpts.reshape(1,-1,3) kk_todo = numpy.ones(kk_table.shape[:2], dtype=bool) vkR = numpy.zeros((nset,nband,nao,nao)) vkI = numpy.zeros((nset,nband,nao,nao)) dmsR = numpy.asarray(dms.real, order='C') dmsI = numpy.asarray(dms.imag, order='C') mem_now = lib.current_memory()[0] max_memory = max(2000, (mydf.max_memory - mem_now)) * .8 log.debug1('max_memory = %d MB (%d in use)', max_memory, mem_now) # K_pq = ( p{k1} i{k2} | i{k2} q{k1} ) def make_kpt(kpt): # kpt = kptj - kpti # search for all possible ki and kj that has ki-kj+kpt=0 kk_match = numpy.einsum('ijx->ij', abs(kk_table + kpt)) < 1e-9 kpti_idx, kptj_idx = numpy.where(kk_todo & kk_match) nkptj = len(kptj_idx) log.debug1('kpt = %s', kpt) log.debug2('kpti_idx = %s', kpti_idx) log.debug2('kptj_idx = %s', kptj_idx) kk_todo[kpti_idx,kptj_idx] = False if swap_2e and not is_zero(kpt): kk_todo[kptj_idx,kpti_idx] = False max_memory1 = max_memory * (nkptj+1)/(nkptj+5) blksize = max(int(max_memory1*4e6/(nkptj+5)/16/nao**2), 16) bufR = numpy.empty((blksize*nao**2)) bufI = numpy.empty((blksize*nao**2)) # Use DF object to mimic KRHF/KUHF object in function get_coulG mydf.exxdiv = exxdiv vkcoulG = mydf.weighted_coulG(kpt, True, mydf.gs) kptjs = kpts[kptj_idx] # <r|-G+k_rs|s> = conj(<s|G-k_rs|r>) = conj(<s|G+k_sr|r>) for k, pqkR, pqkI, p0, p1 \ in mydf.ft_loop(mydf.gs, kpt, kptjs, max_memory=max_memory1): ki = kpti_idx[k] kj = kptj_idx[k] coulG = numpy.sqrt(vkcoulG[p0:p1]) # case 1: k_pq = (pi|iq) #:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,jk->il', v4, dm) pqkR *= coulG pqkI *= coulG pLqR = lib.transpose(pqkR.reshape(nao,nao,-1), axes=(0,2,1), out=bufR) pLqI = lib.transpose(pqkI.reshape(nao,nao,-1), axes=(0,2,1), out=bufI) iLkR = numpy.empty((nao*(p1-p0),nao)) iLkI = numpy.empty((nao*(p1-p0),nao)) for i in range(nset): iLkR, iLkI = zdotNN(pLqR.reshape(-1,nao), pLqI.reshape(-1,nao), dmsR[i,kj], dmsI[i,kj], 1, iLkR, iLkI) zdotNC(iLkR.reshape(nao,-1), iLkI.reshape(nao,-1), pLqR.reshape(nao,-1).T, pLqI.reshape(nao,-1).T, 1, vkR[i,ki], vkI[i,ki], 1) # case 2: k_pq = (iq|pi) #:v4 = numpy.einsum('iLj,lLk->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,li->kj', v4, dm) if swap_2e and not is_zero(kpt): iLkR = iLkR.reshape(nao,-1) iLkI = iLkI.reshape(nao,-1) for i in range(nset): iLkR, iLkI = zdotNN(dmsR[i,ki], dmsI[i,ki], pLqR.reshape(nao,-1), pLqI.reshape(nao,-1), 1, iLkR, iLkI) zdotCN(pLqR.reshape(-1,nao).T, pLqI.reshape(-1,nao).T, iLkR.reshape(-1,nao), iLkI.reshape(-1,nao), 1, vkR[i,kj], vkI[i,kj], 1) pqkR = pqkI = coulG = pLqR = pLqI = iLkR = iLkI = None for ki, kpti in enumerate(kpts_band): for kj, kptj in enumerate(kpts): if kk_todo[ki,kj]: make_kpt(kptj-kpti) if (gamma_point(kpts) and gamma_point(kpts_band) and not numpy.iscomplexobj(dm_kpts)): vk_kpts = vkR else: vk_kpts = vkR + vkI * 1j vk_kpts *= 1./nkpts # G=0 was not included in the non-uniform grids if cell.dimension != 3 and exxdiv is not None: assert(exxdiv.lower() == 'ewald') _ewald_exxdiv_for_G0(cell, kpts_band, dms, vk_kpts) if kpt_band is not None and numpy.shape(kpt_band) == (3,): if dm_kpts.ndim == 3: # One set of dm_kpts for KRHF return vk_kpts[0,0] else: return vk_kpts[:,0] else: return vk_kpts.reshape(dm_kpts.shape)
def get_jk(mydf, dm, hermi=1, kpt=numpy.zeros(3), kpt_band=None, with_j=True, with_k=True, exxdiv=None): '''JK for given k-point''' from pyscf.pbc.df.df_jk import _ewald_exxdiv_for_G0 vj = vk = None if kpt_band is not None and abs(kpt-kpt_band).sum() > 1e-9: kpt = numpy.reshape(kpt, (1,3)) if with_k: vk = get_k_kpts(mydf, [dm], hermi, kpt, kpt_band, exxdiv) if with_j: vj = get_j_kpts(mydf, [dm], hermi, kpt, kpt_band) return vj, vk cell = mydf.cell log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (time.clock(), time.time()) dm = numpy.asarray(dm, order='C') dms = _format_dms(dm, [kpt]) nset, _, nao = dms.shape[:3] dms = dms.reshape(nset,nao,nao) j_real = gamma_point(kpt) k_real = gamma_point(kpt) and not numpy.iscomplexobj(dms) kptii = numpy.asarray((kpt,kpt)) kpt_allow = numpy.zeros(3) if with_j: vjcoulG = mydf.weighted_coulG(kpt_allow, False, mydf.gs) vjR = numpy.zeros((nset,nao,nao)) vjI = numpy.zeros((nset,nao,nao)) if with_k: mydf.exxdiv = exxdiv vkcoulG = mydf.weighted_coulG(kpt_allow, True, mydf.gs) vkR = numpy.zeros((nset,nao,nao)) vkI = numpy.zeros((nset,nao,nao)) dmsR = numpy.asarray(dms.real.reshape(nset,nao,nao), order='C') dmsI = numpy.asarray(dms.imag.reshape(nset,nao,nao), order='C') mem_now = lib.current_memory()[0] max_memory = max(2000, (mydf.max_memory - mem_now)) * .8 log.debug1('max_memory = %d MB (%d in use)', max_memory, mem_now) t2 = t1 # rho_rs(-G+k_rs) is computed as conj(rho_{rs^*}(G-k_rs)) # == conj(transpose(rho_sr(G+k_sr), (0,2,1))) blksize = max(int(max_memory*.25e6/16/nao**2), 16) bufR = numpy.empty(blksize*nao**2) bufI = numpy.empty(blksize*nao**2) for pqkR, pqkI, p0, p1 in mydf.pw_loop(mydf.gs, kptii, max_memory=max_memory): t2 = log.timer_debug1('%d:%d ft_aopair'%(p0,p1), *t2) pqkR = pqkR.reshape(nao,nao,-1) pqkI = pqkI.reshape(nao,nao,-1) if with_j: for i in range(nset): rhoR = numpy.einsum('pq,pqk->k', dmsR[i], pqkR) rhoR+= numpy.einsum('pq,pqk->k', dmsI[i], pqkI) rhoI = numpy.einsum('pq,pqk->k', dmsI[i], pqkR) rhoI-= numpy.einsum('pq,pqk->k', dmsR[i], pqkI) rhoR *= vjcoulG[p0:p1] rhoI *= vjcoulG[p0:p1] vjR[i] += numpy.einsum('pqk,k->pq', pqkR, rhoR) vjR[i] -= numpy.einsum('pqk,k->pq', pqkI, rhoI) if not j_real: vjI[i] += numpy.einsum('pqk,k->pq', pqkR, rhoI) vjI[i] += numpy.einsum('pqk,k->pq', pqkI, rhoR) #t2 = log.timer_debug1(' with_j', *t2) if with_k: coulG = numpy.sqrt(vkcoulG[p0:p1]) pqkR *= coulG pqkI *= coulG #:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,jk->il', v4, dm) pLqR = lib.transpose(pqkR, axes=(0,2,1), out=bufR).reshape(-1,nao) pLqI = lib.transpose(pqkI, axes=(0,2,1), out=bufI).reshape(-1,nao) iLkR = numpy.ndarray((nao*(p1-p0),nao), buffer=pqkR) iLkI = numpy.ndarray((nao*(p1-p0),nao), buffer=pqkI) for i in range(nset): if k_real: lib.dot(pLqR, dmsR[i], 1, iLkR) lib.dot(pLqI, dmsR[i], 1, iLkI) lib.dot(iLkR.reshape(nao,-1), pLqR.reshape(nao,-1).T, 1, vkR[i], 1) lib.dot(iLkI.reshape(nao,-1), pLqI.reshape(nao,-1).T, 1, vkR[i], 1) else: zdotNN(pLqR, pLqI, dmsR[i], dmsI[i], 1, iLkR, iLkI) zdotNC(iLkR.reshape(nao,-1), iLkI.reshape(nao,-1), pLqR.reshape(nao,-1).T, pLqI.reshape(nao,-1).T, 1, vkR[i], vkI[i]) #t2 = log.timer_debug1(' with_k', *t2) pqkR = pqkI = coulG = pLqR = pLqI = iLkR = iLkI = None #t2 = log.timer_debug1('%d:%d'%(p0,p1), *t2) bufR = bufI = None t1 = log.timer_debug1('pwdf_jk.get_jk', *t1) if with_j: if j_real: vj = vjR else: vj = vjR + vjI * 1j vj = vj.reshape(dm.shape) if with_k: if k_real: vk = vkR else: vk = vkR + vkI * 1j if cell.dimension != 3 and exxdiv is not None: assert(exxdiv.lower() == 'ewald') _ewald_exxdiv_for_G0(cell, kpt, dms, vk) vk = vk.reshape(dm.shape) return vj, vk
def get_j_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1,3)), kpt_band=None): cell = mydf.cell log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (time.clock(), time.time()) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] dmsR = dms.real.reshape(nset,nkpts,nao**2) dmsI = dms.imag.reshape(nset,nkpts,nao**2) kpt_allow = numpy.zeros(3) coulG = mydf.weighted_coulG(kpt_allow, False, mydf.gs) ngs = len(coulG) vR = numpy.zeros((nset,ngs)) vI = numpy.zeros((nset,ngs)) max_memory = (mydf.max_memory - lib.current_memory()[0]) * .8 for k, pqkR, pqkI, p0, p1 \ in mydf.ft_loop(mydf.gs, kpt_allow, kpts, max_memory=max_memory): for i in range(nset): rhoR = numpy.dot(dmsR[i,k], pqkR) rhoR+= numpy.dot(dmsI[i,k], pqkI) rhoI = numpy.dot(dmsI[i,k], pqkR) rhoI-= numpy.dot(dmsR[i,k], pqkI) vR[i,p0:p1] += rhoR * coulG[p0:p1] vI[i,p0:p1] += rhoI * coulG[p0:p1] pqkR = pqkI = coulG = None weight = 1./len(kpts) vR *= weight vI *= weight t1 = log.timer_debug1('get_j pass 1 to compute J(G)', *t1) if kpt_band is None: kpts_band = kpts else: kpts_band = numpy.reshape(kpt_band, (-1,3)) gamma_point = abs(kpts_band).sum() < 1e-9 nband = len(kpts_band) vjR = numpy.zeros((nset,nband,nao*nao)) vjI = numpy.zeros((nset,nband,nao*nao)) for k, pqkR, pqkI, p0, p1 \ in mydf.ft_loop(mydf.gs, kpt_allow, kpts_band, max_memory=max_memory): for i in range(nset): vjR[i,k] += numpy.dot(pqkR, vR[i,p0:p1]) vjR[i,k] -= numpy.dot(pqkI, vI[i,p0:p1]) if not gamma_point: for i in range(nset): vjI[i,k] += numpy.dot(pqkI, vR[i,p0:p1]) vjI[i,k] += numpy.dot(pqkR, vI[i,p0:p1]) pqkR = pqkI = coulG = None if gamma_point: vj_kpts = vjR else: vj_kpts = vjR + vjI*1j t1 = log.timer_debug1('get_j pass 2', *t1) if kpt_band is not None and numpy.shape(kpt_band) == (3,): if dm_kpts.ndim == 3: # One set of dm_kpts for KRHF return vj_kpts[0,0] else: return vj_kpts[:,0] else: return vj_kpts.reshape(dm_kpts.shape)
def get_k_e1_kpts(mydf, dm_kpts, kpts=np.zeros((1, 3)), kpts_band=None, exxdiv=None): '''Derivatives of exchange (K) AO matrix at sampled k-points. ''' cell = mydf.cell mesh = mydf.mesh coords = cell.gen_uniform_grids(mesh) ngrids = coords.shape[0] if getattr(dm_kpts, 'mo_coeff', None) is not None: mo_coeff = dm_kpts.mo_coeff mo_occ = dm_kpts.mo_occ else: mo_coeff = None kpts = np.asarray(kpts) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] weight = 1. / nkpts * (cell.vol / ngrids) kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) if gamma_point(kpts_band) and gamma_point(kpts): vk_kpts = np.zeros((3, nset, nband, nao, nao), dtype=dms.dtype) else: vk_kpts = np.zeros((3, nset, nband, nao, nao), dtype=np.complex128) coords = mydf.grids.coords if input_band is None: ao2_kpts = [ np.asarray(ao.transpose(0, 2, 1), order='C') for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts, deriv=1) ] ao1_kpts = ao2_kpts ao2_kpts = [ao2_kpt[0] for ao2_kpt in ao2_kpts] else: ao2_kpts = [ np.asarray(ao.T, order='C') for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts) ] ao1_kpts = [ np.asarray(ao.transpose(0, 2, 1), order='C') for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts_band, deriv=1) ] if mo_coeff is not None and nset == 1: mo_coeff = [ mo_coeff[k][:, occ > 0] * np.sqrt(occ[occ > 0]) for k, occ in enumerate(mo_occ) ] ao2_kpts = [np.dot(mo_coeff[k].T, ao) for k, ao in enumerate(ao2_kpts)] mem_now = lib.current_memory()[0] max_memory = mydf.max_memory - mem_now blksize = int( min(nao, max(1, (max_memory - mem_now) * 1e6 / 16 / 4 / 3 / ngrids / nao))) lib.logger.debug1(mydf, 'fft_jk: get_k_kpts max_memory %s blksize %d', max_memory, blksize) ao1_dtype = np.result_type(*ao1_kpts) ao2_dtype = np.result_type(*ao2_kpts) vR_dm = np.empty((3, nset, nao, ngrids), dtype=vk_kpts.dtype) t1 = (time.clock(), time.time()) for k2, ao2T in enumerate(ao2_kpts): if ao2T.size == 0: continue kpt2 = kpts[k2] naoj = ao2T.shape[0] if mo_coeff is None or nset > 1: ao_dms = [lib.dot(dms[i, k2], ao2T.conj()) for i in range(nset)] else: ao_dms = [ao2T.conj()] for k1, ao1T in enumerate(ao1_kpts): kpt1 = kpts_band[k1] # If we have an ewald exxdiv, we add the G=0 correction near the # end of the function to bypass any discretization errors # that arise from the FFT. mydf.exxdiv = exxdiv if exxdiv == 'ewald' or exxdiv is None: coulG = tools.get_coulG(cell, kpt2 - kpt1, False, mydf, mesh) else: coulG = tools.get_coulG(cell, kpt2 - kpt1, True, mydf, mesh) if is_zero(kpt1 - kpt2): expmikr = np.array(1.) else: expmikr = np.exp(-1j * np.dot(coords, kpt2 - kpt1)) for p0, p1 in lib.prange(0, nao, blksize): rho1 = np.einsum('aig,jg->aijg', ao1T[1:, p0:p1].conj() * expmikr, ao2T) vG = tools.fft(rho1.reshape(-1, ngrids), mesh) rho1 = None vG *= coulG vR = tools.ifft(vG, mesh).reshape(3, p1 - p0, naoj, ngrids) vG = None if vR_dm.dtype == np.double: vR = vR.real for i in range(nset): np.einsum('aijg,jg->aig', vR, ao_dms[i], out=vR_dm[:, i, p0:p1]) vR = None vR_dm *= expmikr.conj() for i in range(nset): vk_kpts[:, i, k1] -= weight * np.einsum( 'aig,jg->aij', vR_dm[:, i], ao1T[0]) t1 = lib.logger.timer_debug1(mydf, 'get_k_kpts: make_kpt (%d,*)' % k2, *t1) # Ewald correction has no contribution to nuclear gradient unless range separted Coulomb is used # The gradient correction part is not added in the vk matrix if exxdiv == 'ewald' and cell.omega != 0: raise NotImplementedError("Range Separated Coulomb") # when cell.omega !=0: madelung constant will have a non-zero derivative vk_kpts = np.asarray( [_format_jks(vk, dm_kpts, input_band, kpts) for vk in vk_kpts]) return vk_kpts
def get_j_for_bands(self, dm_kpts, hermi=1, kpts=np.zeros((1, 3)), kpts_band=None): log = logger.Logger(self.stdout, self.verbose) t1 = (time.clock(), time.time()) cell = self.cell dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) n_dm, nkpts, nao = dms.shape[:3] n_diffused = cell._nbas_each_set[2] nao_compact = cell.ao_loc[cell.nbas - n_diffused] kpt_allow = np.zeros(3) mesh = self.mesh coulG = self.weighted_coulG(kpt_allow, False, mesh) coulG_LR = self.weighted_coulG_LR(kpt_allow, False, mesh) coulG_SR = coulG - coulG_LR ngrids = len(coulG) vG = np.zeros((n_dm, ngrids), dtype=np.complex128) vG_SR = np.zeros((n_dm, ngrids), dtype=np.complex128) max_memory = (self.max_memory - lib.current_memory()[0]) * .8 for aoaoks, p0, p1 in self.ft_loop(mesh, kpt_allow, kpts, max_memory=max_memory): #:rho = np.einsum('lkL,lk->L', pqk.conj(), dm) for k, aoao in enumerate(aoaoks): aoao = aoao.reshape(-1, nao, nao) if nao_compact < nao: for i in range(n_dm): rho = np.einsum('ij,Lji->L', dms[i, k], aoao.conj()) vG[i, p0:p1] += rho * coulG[p0:p1] aoao[:, nao_compact:, nao_compact:] = 0 for i in range(n_dm): rho = np.einsum('ij,Lji->L', dms[i, k], aoao.conj()) vG_SR[i, p0:p1] += rho * coulG_SR[p0:p1] else: for i in range(n_dm): rho = np.einsum('ij,Lji->L', dms[i, k], aoao.conj()) vG[i, p0:p1] += rho * coulG_LR[p0:p1] aoao = aoaoks = p0 = p1 = None weight = 1. / len(kpts) vG *= weight vG_SR *= weight t1 = log.timer_debug1('get_j pass 1 to compute J(G)', *t1) kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) vj_kpts = np.zeros((n_dm, nband, nao, nao), dtype=np.complex128) for aoaoks, p0, p1 in self.ft_loop(mesh, kpt_allow, kpts_band, max_memory=max_memory): for k, aoao in enumerate(aoaoks): aoao = aoao.reshape(-1, nao, nao) if nao_compact < nao: for i in range(n_dm): vj_kpts[i, k] += np.einsum('L,Lij->ij', vG[i, p0:p1], aoao) aoao[:, nao_compact:, nao_compact:] = 0 for i in range(n_dm): vj_kpts[i, k] -= np.einsum('L,Lij->ij', vG_SR[i, p0:p1], aoao) else: for i in range(n_dm): vj_kpts[i, k] += np.einsum('L,Lij->ij', vG[i, p0:p1], aoao) aoao = aoaoks = p0 = p1 = None # G=0 contribution, associated to 2e integrals in real-space if cell.dimension >= 2: ovlp = np.asarray(cell.pbc_intor('int1e_ovlp', hermi=1, kpts=kpts)) ovlp[:, nao_compact:, nao_compact:] = 0 ovlp_b = np.asarray( cell.pbc_intor('int1e_ovlp', hermi=1, kpts=kpts_band)) ovlp_b[:, nao_compact:, nao_compact:] = 0 kws = cell.get_Gv_weights(mesh)[2] G0_weight = kws[0] if isinstance(kws, np.ndarray) else kws vj_G0 = lib.einsum('kpq,nkqp,lrs->nlrs', ovlp, dm_kpts, ovlp_b) vj_kpts -= np.pi / self.omega**2 * weight * G0_weight * vj_G0 if gamma_point(kpts_band): vj_kpts = vj_kpts.real.copy() t1 = log.timer_debug1('get_j pass 2', *t1) return _format_jks(vj_kpts, dm_kpts, input_band, kpts)
def get_jk(mydf, dm, hermi=1, kpt=numpy.zeros(3), kpt_band=None, with_j=True, with_k=True, exxdiv=None): '''JK for given k-point''' from pyscf.pbc.df.df_jk import _ewald_exxdiv_for_G0 vj = vk = None if kpt_band is not None and abs(kpt-kpt_band).sum() > 1e-9: kpt = numpy.reshape(kpt, (1,3)) if with_k: vk = get_k_kpts(mydf, dm, hermi, kpt, kpt_band, exxdiv) if with_j: vj = get_j_kpts(mydf, dm, hermi, kpt, kpt_band) return vj, vk cell = mydf.cell log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (time.clock(), time.time()) dm = numpy.asarray(dm, order='C') dms = _format_dms(dm, [kpt]) nset, _, nao = dms.shape[:3] dms = dms.reshape(nset,nao,nao) j_real = gamma_point(kpt) k_real = gamma_point(kpt) and not numpy.iscomplexobj(dms) kptii = numpy.asarray((kpt,kpt)) kpt_allow = numpy.zeros(3) if with_j: vjcoulG = mydf.weighted_coulG(kpt_allow, False, mydf.gs) vjR = numpy.zeros((nset,nao,nao)) vjI = numpy.zeros((nset,nao,nao)) if with_k: mydf.exxdiv = exxdiv vkcoulG = mydf.weighted_coulG(kpt_allow, True, mydf.gs) vkR = numpy.zeros((nset,nao,nao)) vkI = numpy.zeros((nset,nao,nao)) dmsR = numpy.asarray(dms.real.reshape(nset,nao,nao), order='C') dmsI = numpy.asarray(dms.imag.reshape(nset,nao,nao), order='C') mem_now = lib.current_memory()[0] max_memory = max(2000, (mydf.max_memory - mem_now)) * .8 log.debug1('max_memory = %d MB (%d in use)', max_memory, mem_now) t2 = t1 # rho_rs(-G+k_rs) is computed as conj(rho_{rs^*}(G-k_rs)) # == conj(transpose(rho_sr(G+k_sr), (0,2,1))) blksize = max(int(max_memory*.25e6/16/nao**2), 16) bufR = numpy.empty(blksize*nao**2) bufI = numpy.empty(blksize*nao**2) for pqkR, pqkI, p0, p1 in mydf.pw_loop(mydf.gs, kptii, max_memory=max_memory): t2 = log.timer_debug1('%d:%d ft_aopair'%(p0,p1), *t2) pqkR = pqkR.reshape(nao,nao,-1) pqkI = pqkI.reshape(nao,nao,-1) if with_j: #:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj()) #:vj += numpy.einsum('ijkl,lk->ij', v4, dm) for i in range(nset): rhoR = numpy.einsum('pq,pqk->k', dmsR[i], pqkR) rhoR+= numpy.einsum('pq,pqk->k', dmsI[i], pqkI) rhoI = numpy.einsum('pq,pqk->k', dmsI[i], pqkR) rhoI-= numpy.einsum('pq,pqk->k', dmsR[i], pqkI) rhoR *= vjcoulG[p0:p1] rhoI *= vjcoulG[p0:p1] vjR[i] += numpy.einsum('pqk,k->pq', pqkR, rhoR) vjR[i] -= numpy.einsum('pqk,k->pq', pqkI, rhoI) if not j_real: vjI[i] += numpy.einsum('pqk,k->pq', pqkR, rhoI) vjI[i] += numpy.einsum('pqk,k->pq', pqkI, rhoR) #t2 = log.timer_debug1(' with_j', *t2) if with_k: coulG = numpy.sqrt(vkcoulG[p0:p1]) pqkR *= coulG pqkI *= coulG #:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,jk->il', v4, dm) pLqR = lib.transpose(pqkR, axes=(0,2,1), out=bufR).reshape(-1,nao) pLqI = lib.transpose(pqkI, axes=(0,2,1), out=bufI).reshape(-1,nao) iLkR = numpy.ndarray((nao*(p1-p0),nao), buffer=pqkR) iLkI = numpy.ndarray((nao*(p1-p0),nao), buffer=pqkI) for i in range(nset): if k_real: lib.dot(pLqR, dmsR[i], 1, iLkR) lib.dot(pLqI, dmsR[i], 1, iLkI) lib.dot(iLkR.reshape(nao,-1), pLqR.reshape(nao,-1).T, 1, vkR[i], 1) lib.dot(iLkI.reshape(nao,-1), pLqI.reshape(nao,-1).T, 1, vkR[i], 1) else: zdotNN(pLqR, pLqI, dmsR[i], dmsI[i], 1, iLkR, iLkI) zdotNC(iLkR.reshape(nao,-1), iLkI.reshape(nao,-1), pLqR.reshape(nao,-1).T, pLqI.reshape(nao,-1).T, 1, vkR[i], vkI[i]) #t2 = log.timer_debug1(' with_k', *t2) pqkR = pqkI = coulG = pLqR = pLqI = iLkR = iLkI = None #t2 = log.timer_debug1('%d:%d'%(p0,p1), *t2) bufR = bufI = None t1 = log.timer_debug1('aft_jk.get_jk', *t1) if with_j: if j_real: vj = vjR else: vj = vjR + vjI * 1j vj = vj.reshape(dm.shape) if with_k: if k_real: vk = vkR else: vk = vkR + vkI * 1j if cell.dimension != 3 and exxdiv: assert(exxdiv.lower() == 'ewald') _ewald_exxdiv_for_G0(cell, kpt, dms, vk) vk = vk.reshape(dm.shape) return vj, vk
def get_k_kpts(self, dm_kpts, hermi=1, kpts=np.zeros((1, 3)), kpts_band=None, exxdiv=None): ''' C ~ compact basis, D ~ diffused basis Compute K matrix with coulG_LR: (CC|CC) (CC|CD) (CC|DC) (CD|CC) (CD|CD) (CD|DC) (DC|CC) (DC|CD) (DC|DC) Compute K matrix with full coulG: (CC|DD) (CD|DD) (DC|DD) (DD|CC) (DD|CD) (DD|DC) (DD|DD) ''' cell = self.cell log = logger.Logger(self.stdout, self.verbose) t1 = (time.clock(), time.time()) mesh = self.mesh dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] swap_2e = (kpts_band is None) kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) kk_table = kpts_band.reshape(-1, 1, 3) - kpts.reshape(1, -1, 3) kk_todo = np.ones(kk_table.shape[:2], dtype=bool) vkR = np.zeros((nset, nband, nao, nao)) vkI = np.zeros((nset, nband, nao, nao)) dmsR = np.asarray(dms.real, order='C') dmsI = np.asarray(dms.imag, order='C') weight = 1. / nkpts n_diffused = cell._nbas_each_set[2] nao_compact = cell.ao_loc[cell.nbas - n_diffused] mem_now = lib.current_memory()[0] max_memory = max(2000, (self.max_memory - mem_now)) * .8 log.debug1('max_memory = %d MB (%d in use)', max_memory, mem_now) # K_pq = ( p{k1} i{k2} | i{k2} q{k1} ) def make_kpt(kpt): # kpt = kptj - kpti # search for all possible ki and kj that has ki-kj+kpt=0 kk_match = np.einsum('ijx->ij', abs(kk_table + kpt)) < 1e-9 kpti_idx, kptj_idx = np.where(kk_todo & kk_match) nkptj = len(kptj_idx) log.debug1('kpt = %s', kpt) log.debug2('kpti_idx = %s', kpti_idx) log.debug2('kptj_idx = %s', kptj_idx) kk_todo[kpti_idx, kptj_idx] = False if swap_2e and not is_zero(kpt): kk_todo[kptj_idx, kpti_idx] = False max_memory1 = max_memory * (nkptj + 1) / (nkptj + 5) #blksize = max(int(max_memory1*4e6/(nkptj+5)/16/nao**2), 16) #bufR = np.empty((blksize*nao**2)) #bufI = np.empty((blksize*nao**2)) # Use DF object to mimic KRHF/KUHF object in function get_coulG vkcoulG = self.weighted_coulG(kpt, exxdiv, mesh) coulG_SR = self.weighted_coulG_SR(kpt, False, mesh) coulG_LR = vkcoulG - coulG_SR kptjs = kpts[kptj_idx] perm_sym = swap_2e and not is_zero(kpt) for aoaoks, p0, p1 in self.ft_loop(mesh, kpt, kptjs, max_memory=max_memory1): if nao_compact < nao: aoaoks = [aoao.reshape(-1, nao, nao) for aoao in aoaoks] aft_jk._update_vk_((vkR, vkI), aoaoks, (dmsR, dmsI), vkcoulG[p0:p1], weight, kpti_idx, kptj_idx, perm_sym) for aoao in aoaoks: aoao[:, nao_compact:, nao_compact:] = 0 aft_jk._update_vk_((vkR, vkI), aoaoks, (dmsR, dmsI), coulG_SR[p0:p1], -weight, kpti_idx, kptj_idx, perm_sym) else: aft_jk._update_vk_((vkR, vkI), aoaoks, (dmsR, dmsI), coulG_LR[p0:p1], weight, kpti_idx, kptj_idx, perm_sym) for ki, kpti in enumerate(kpts_band): for kj, kptj in enumerate(kpts): if kk_todo[ki, kj]: make_kpt(kptj - kpti) t1 = log.timer_debug1('get_k_kpts: make_kpt (%d,*)' % ki, *t1) if (gamma_point(kpts) and gamma_point(kpts_band) and not np.iscomplexobj(dm_kpts)): vk_kpts = vkR else: vk_kpts = vkR + vkI * 1j # G=0 associated to 2e integrals in real-space if cell.dimension >= 2: ovlp = np.asarray(cell.pbc_intor('int1e_ovlp', hermi=1, kpts=kpts)) ovlp[:, nao_compact:, nao_compact:] = 0 kws = cell.get_Gv_weights(mesh)[2] G0_weight = kws[0] if isinstance(kws, np.ndarray) else kws vk_G0 = lib.einsum('kpq,nkqr,krs->nkps', ovlp, dm_kpts, ovlp) vk_kpts -= np.pi / self.omega**2 * weight * G0_weight * vk_G0 # Add ewald_exxdiv contribution because G=0 was not included in the # non-uniform grids if (exxdiv == 'ewald' and (cell.dimension < 2 or # 0D and 1D are computed with inf_vacuum (cell.dimension == 2 and cell.low_dim_ft_type == 'inf_vacuum'))): _ewald_exxdiv_for_G0(cell, kpts_band, dms, vk_kpts, kpts_band) return _format_jks(vk_kpts, dm_kpts, input_band, kpts)
def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1,3)), kpts_band=None, exxdiv=None): cell = mydf.cell log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (time.clock(), time.time()) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] swap_2e = (kpts_band is None) kpts_band, single_kpt_band = _format_kpts_band(kpts_band, kpts) nband = len(kpts_band) kk_table = kpts_band.reshape(-1,1,3) - kpts.reshape(1,-1,3) kk_todo = numpy.ones(kk_table.shape[:2], dtype=bool) vkR = numpy.zeros((nset,nband,nao,nao)) vkI = numpy.zeros((nset,nband,nao,nao)) dmsR = numpy.asarray(dms.real, order='C') dmsI = numpy.asarray(dms.imag, order='C') mem_now = lib.current_memory()[0] max_memory = max(2000, (mydf.max_memory - mem_now)) * .8 log.debug1('max_memory = %d MB (%d in use)', max_memory, mem_now) # K_pq = ( p{k1} i{k2} | i{k2} q{k1} ) def make_kpt(kpt): # kpt = kptj - kpti # search for all possible ki and kj that has ki-kj+kpt=0 kk_match = numpy.einsum('ijx->ij', abs(kk_table + kpt)) < 1e-9 kpti_idx, kptj_idx = numpy.where(kk_todo & kk_match) nkptj = len(kptj_idx) log.debug1('kpt = %s', kpt) log.debug2('kpti_idx = %s', kpti_idx) log.debug2('kptj_idx = %s', kptj_idx) kk_todo[kpti_idx,kptj_idx] = False if swap_2e and not is_zero(kpt): kk_todo[kptj_idx,kpti_idx] = False max_memory1 = max_memory * (nkptj+1)/(nkptj+5) blksize = max(int(max_memory1*4e6/(nkptj+5)/16/nao**2), 16) bufR = numpy.empty((blksize*nao**2)) bufI = numpy.empty((blksize*nao**2)) # Use DF object to mimic KRHF/KUHF object in function get_coulG mydf.exxdiv = exxdiv vkcoulG = mydf.weighted_coulG(kpt, True, mydf.gs) kptjs = kpts[kptj_idx] # <r|-G+k_rs|s> = conj(<s|G-k_rs|r>) = conj(<s|G+k_sr|r>) for k, pqkR, pqkI, p0, p1 \ in mydf.ft_loop(mydf.gs, kpt, kptjs, max_memory=max_memory1): ki = kpti_idx[k] kj = kptj_idx[k] coulG = numpy.sqrt(vkcoulG[p0:p1]) # case 1: k_pq = (pi|iq) #:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,jk->il', v4, dm) pqkR *= coulG pqkI *= coulG pLqR = lib.transpose(pqkR.reshape(nao,nao,-1), axes=(0,2,1), out=bufR) pLqI = lib.transpose(pqkI.reshape(nao,nao,-1), axes=(0,2,1), out=bufI) iLkR = numpy.empty((nao*(p1-p0),nao)) iLkI = numpy.empty((nao*(p1-p0),nao)) for i in range(nset): iLkR, iLkI = zdotNN(pLqR.reshape(-1,nao), pLqI.reshape(-1,nao), dmsR[i,kj], dmsI[i,kj], 1, iLkR, iLkI) zdotNC(iLkR.reshape(nao,-1), iLkI.reshape(nao,-1), pLqR.reshape(nao,-1).T, pLqI.reshape(nao,-1).T, 1, vkR[i,ki], vkI[i,ki], 1) # case 2: k_pq = (iq|pi) #:v4 = numpy.einsum('iLj,lLk->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,li->kj', v4, dm) if swap_2e and not is_zero(kpt): iLkR = iLkR.reshape(nao,-1) iLkI = iLkI.reshape(nao,-1) for i in range(nset): iLkR, iLkI = zdotNN(dmsR[i,ki], dmsI[i,ki], pLqR.reshape(nao,-1), pLqI.reshape(nao,-1), 1, iLkR, iLkI) zdotCN(pLqR.reshape(-1,nao).T, pLqI.reshape(-1,nao).T, iLkR.reshape(-1,nao), iLkI.reshape(-1,nao), 1, vkR[i,kj], vkI[i,kj], 1) pqkR = pqkI = coulG = pLqR = pLqI = iLkR = iLkI = None for ki, kpti in enumerate(kpts_band): for kj, kptj in enumerate(kpts): if kk_todo[ki,kj]: make_kpt(kptj-kpti) if (gamma_point(kpts) and gamma_point(kpts_band) and not numpy.iscomplexobj(dm_kpts)): vk_kpts = vkR else: vk_kpts = vkR + vkI * 1j vk_kpts *= 1./nkpts # G=0 was not included in the non-uniform grids if cell.dimension != 3 and exxdiv: assert(exxdiv.lower() == 'ewald') _ewald_exxdiv_for_G0(cell, kpts_band, dms, vk_kpts, kpts_band) return _format_jks(vk_kpts, dm_kpts, kpts_band, kpts, single_kpt_band)
def get_j_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1,3)), kpt_band=None): cell = mydf.cell log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (time.clock(), time.time()) if mydf._cderi is None: mydf.build() t1 = log.timer_debug1('Init get_j_kpts', *t1) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] if kpt_band is None: kpts_band = kpts else: kpts_band = numpy.reshape(kpt_band, (-1,3)) nband = len(kpts_band) j_real = gamma_point(kpts_band) mem_now = lib.current_memory()[0] max_memory = max(2000, (mydf.max_memory - mem_now)) * .9 log.debug1('max_memory = %d MB (%d in use)', max_memory, mem_now) naux = mydf.auxcell.nao_nr() dmsR = dms.real.transpose(0,1,3,2).reshape(nset,nkpts,nao**2) dmsI = dms.imag.transpose(0,1,3,2).reshape(nset,nkpts,nao**2) rhoR = numpy.zeros((nset,naux)) rhoI = numpy.zeros((nset,naux)) jauxR = numpy.zeros((nset,naux)) jauxI = numpy.zeros((nset,naux)) for k, kpt in enumerate(kpts_band): kptii = numpy.asarray((kpt,kpt)) p1 = 0 for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptii, max_memory, False): p0, p1 = p1, p1+LpqR.shape[0] #:Lpq = LpqR + LpqI*1j #:j3c = j3cR + j3cI*1j #:rho [:,p0:p1] += numpy.einsum('Lpq,xqp->xL', Lpq, dms[:,k]) #:jaux[:,p0:p1] += numpy.einsum('Lpq,xqp->xL', j3c, dms[:,k]) rhoR [:,p0:p1]+= numpy.einsum('Lp,xp->xL', LpqR, dmsR[:,k]) rhoR [:,p0:p1]-= numpy.einsum('Lp,xp->xL', LpqI, dmsI[:,k]) rhoI [:,p0:p1]+= numpy.einsum('Lp,xp->xL', LpqR, dmsI[:,k]) rhoI [:,p0:p1]+= numpy.einsum('Lp,xp->xL', LpqI, dmsR[:,k]) jauxR[:,p0:p1]+= numpy.einsum('Lp,xp->xL', j3cR, dmsR[:,k]) jauxR[:,p0:p1]-= numpy.einsum('Lp,xp->xL', j3cI, dmsI[:,k]) jauxI[:,p0:p1]+= numpy.einsum('Lp,xp->xL', j3cR, dmsI[:,k]) jauxI[:,p0:p1]+= numpy.einsum('Lp,xp->xL', j3cI, dmsR[:,k]) LpqR = LpqI = j3cR = j3cI = None weight = 1./nkpts jauxR *= weight jauxI *= weight rhoR *= weight rhoI *= weight vjR = numpy.zeros((nset,nband,nao,nao)) vjI = numpy.zeros((nset,nband,nao,nao)) for k, kpt in enumerate(kpts_band): kptii = numpy.asarray((kpt,kpt)) p1 = 0 for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptii, max_memory, True): p0, p1 = p1, p1+LpqR.shape[0] #:v = numpy.dot(jaux, Lpq) + numpy.dot(rho, j3c) #:vj_kpts[:,k] += lib.unpack_tril(v) v = numpy.dot(jauxR[:,p0:p1], LpqR) v -= numpy.dot(jauxI[:,p0:p1], LpqI) v += numpy.dot(rhoR [:,p0:p1], j3cR) v -= numpy.dot(rhoI [:,p0:p1], j3cI) vjR[:,k] += lib.unpack_tril(v) if not j_real: v = numpy.dot(jauxR[:,p0:p1], LpqI) v += numpy.dot(jauxI[:,p0:p1], LpqR) v += numpy.dot(rhoR [:,p0:p1], j3cI) v += numpy.dot(rhoI [:,p0:p1], j3cR) vjI[:,k] += lib.unpack_tril(v, lib.ANTIHERMI) LpqR = LpqI = j3cR = j3cI = None t1 = log.timer_debug1('get_j pass 2', *t1) vj_kpts = pwdf_jk.get_j_kpts(mydf, dm_kpts, hermi, kpts, kpt_band) if j_real: vj_kpts += vjR.reshape(vj_kpts.shape) else: vj_kpts += (vjR+vjI*1j).reshape(vj_kpts.shape) return vj_kpts
def get_jk(mydf, dm, hermi=1, kpt=numpy.zeros(3), kpts_band=None, with_j=True, with_k=True, exxdiv=None): '''JK for given k-point''' vj = vk = None if kpts_band is not None and abs(kpt-kpts_band).sum() > 1e-9: kpt = numpy.reshape(kpt, (1,3)) if with_k: vk = get_k_kpts(mydf, dm, hermi, kpt, kpts_band, exxdiv) if with_j: vj = get_j_kpts(mydf, dm, hermi, kpt, kpts_band) return vj, vk cell = mydf.cell log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (time.clock(), time.time()) dm = numpy.asarray(dm, order='C') dms = _format_dms(dm, [kpt]) nset, _, nao = dms.shape[:3] dms = dms.reshape(nset,nao,nao) j_real = gamma_point(kpt) k_real = gamma_point(kpt) and not numpy.iscomplexobj(dms) mesh = mydf.mesh kptii = numpy.asarray((kpt,kpt)) kpt_allow = numpy.zeros(3) if with_j: vjcoulG = mydf.weighted_coulG(kpt_allow, False, mesh) vjR = numpy.zeros((nset,nao,nao)) vjI = numpy.zeros((nset,nao,nao)) if with_k: mydf.exxdiv = exxdiv vkcoulG = mydf.weighted_coulG(kpt_allow, True, mesh) vkR = numpy.zeros((nset,nao,nao)) vkI = numpy.zeros((nset,nao,nao)) dmsR = numpy.asarray(dms.real.reshape(nset,nao,nao), order='C') dmsI = numpy.asarray(dms.imag.reshape(nset,nao,nao), order='C') mem_now = lib.current_memory()[0] max_memory = max(2000, (mydf.max_memory - mem_now)) * .8 log.debug1('max_memory = %d MB (%d in use)', max_memory, mem_now) t2 = t1 # rho_rs(-G+k_rs) is computed as conj(rho_{rs^*}(G-k_rs)) # == conj(transpose(rho_sr(G+k_sr), (0,2,1))) blksize = max(int(max_memory*.25e6/16/nao**2), 16) pLqR = pLqI = None for pqkR, pqkI, p0, p1 in mydf.pw_loop(mesh, kptii, max_memory=max_memory): t2 = log.timer_debug1('%d:%d ft_aopair'%(p0,p1), *t2) pqkR = pqkR.reshape(nao,nao,-1) pqkI = pqkI.reshape(nao,nao,-1) if with_j: #:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj()) #:vj += numpy.einsum('ijkl,lk->ij', v4, dm) for i in range(nset): rhoR = numpy.einsum('pq,pqk->k', dmsR[i], pqkR) rhoR+= numpy.einsum('pq,pqk->k', dmsI[i], pqkI) rhoI = numpy.einsum('pq,pqk->k', dmsI[i], pqkR) rhoI-= numpy.einsum('pq,pqk->k', dmsR[i], pqkI) rhoR *= vjcoulG[p0:p1] rhoI *= vjcoulG[p0:p1] vjR[i] += numpy.einsum('pqk,k->pq', pqkR, rhoR) vjR[i] -= numpy.einsum('pqk,k->pq', pqkI, rhoI) if not j_real: vjI[i] += numpy.einsum('pqk,k->pq', pqkR, rhoI) vjI[i] += numpy.einsum('pqk,k->pq', pqkI, rhoR) #t2 = log.timer_debug1(' with_j', *t2) if with_k: #:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,jk->il', v4, dm) pLqR = lib.transpose(pqkR, axes=(0,2,1), out=pLqR).reshape(-1,nao) pLqI = lib.transpose(pqkI, axes=(0,2,1), out=pLqI).reshape(-1,nao) nG = p1 - p0 iLkR = numpy.ndarray((nao,nG,nao), buffer=pqkR) iLkI = numpy.ndarray((nao,nG,nao), buffer=pqkI) for i in range(nset): if k_real: lib.dot(pLqR, dmsR[i], 1, iLkR.reshape(nao*nG,nao)) lib.dot(pLqI, dmsR[i], 1, iLkI.reshape(nao*nG,nao)) iLkR *= vkcoulG[p0:p1].reshape(1,nG,1) iLkI *= vkcoulG[p0:p1].reshape(1,nG,1) lib.dot(iLkR.reshape(nao,-1), pLqR.reshape(nao,-1).T, 1, vkR[i], 1) lib.dot(iLkI.reshape(nao,-1), pLqI.reshape(nao,-1).T, 1, vkR[i], 1) else: zdotNN(pLqR, pLqI, dmsR[i], dmsI[i], 1, iLkR.reshape(-1,nao), iLkI.reshape(-1,nao)) iLkR *= vkcoulG[p0:p1].reshape(1,nG,1) iLkI *= vkcoulG[p0:p1].reshape(1,nG,1) zdotNC(iLkR.reshape(nao,-1), iLkI.reshape(nao,-1), pLqR.reshape(nao,-1).T, pLqI.reshape(nao,-1).T, 1, vkR[i], vkI[i]) #t2 = log.timer_debug1(' with_k', *t2) pqkR = pqkI = coulG = pLqR = pLqI = iLkR = iLkI = None #t2 = log.timer_debug1('%d:%d'%(p0,p1), *t2) bufR = bufI = None t1 = log.timer_debug1('aft_jk.get_jk', *t1) if with_j: if j_real: vj = vjR else: vj = vjR + vjI * 1j vj = vj.reshape(dm.shape) if with_k: if k_real: vk = vkR else: vk = vkR + vkI * 1j # Add ewald_exxdiv contribution because G=0 was not included in the # non-uniform grids if (exxdiv == 'ewald' and (cell.dimension < 2 or # 0D and 1D are computed with inf_vacuum (cell.dimension == 2 and cell.low_dim_ft_type == 'inf_vacuum'))): _ewald_exxdiv_for_G0(cell, kpt, dms, vk) vk = vk.reshape(dm.shape) return vj, vk
def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1,3)), kpts_band=None, exxdiv=None): mydf = _sync_mydf(mydf) cell = mydf.cell mesh = mydf.mesh coords = cell.gen_uniform_grids(mesh) ngrids = coords.shape[0] if hasattr(dm_kpts, 'mo_coeff'): if dm_kpts.ndim == 3: # KRHF mo_coeff = [dm_kpts.mo_coeff] mo_occ = [dm_kpts.mo_occ ] else: # KUHF mo_coeff = dm_kpts.mo_coeff mo_occ = dm_kpts.mo_occ elif hasattr(dm_kpts[0], 'mo_coeff'): mo_coeff = [dm.mo_coeff for dm in dm_kpts] mo_occ = [dm.mo_occ for dm in dm_kpts] else: mo_coeff = None kpts = numpy.asarray(kpts) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] weight = 1./nkpts * (cell.vol/ngrids) kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) if gamma_point(kpts_band) and gamma_point(kpts): vk_kpts = numpy.zeros((nset,nband,nao,nao), dtype=dms.dtype) else: vk_kpts = numpy.zeros((nset,nband,nao,nao), dtype=numpy.complex128) coords = mydf.grids.coords ao2_kpts = [numpy.asarray(ao.T, order='C') for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts)] if input_band is None: ao1_kpts = ao2_kpts else: ao1_kpts = [numpy.asarray(ao.T, order='C') for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts_band)] mem_now = lib.current_memory()[0] max_memory = mydf.max_memory - mem_now blksize = int(min(nao, max(1, (max_memory-mem_now)*1e6/16/4/ngrids/nao))) lib.logger.debug1(mydf, 'max_memory %s blksize %d', max_memory, blksize) ao1_dtype = numpy.result_type(*ao1_kpts) ao2_dtype = numpy.result_type(*ao2_kpts) vR_dm = numpy.empty((nset,nao,ngrids), dtype=vk_kpts.dtype) ao_dms_buf = [None] * nkpts tasks = [(k1,k2) for k2 in range(nkpts) for k1 in range(nband)] for k1, k2 in mpi.static_partition(tasks): ao1T = ao1_kpts[k1] ao2T = ao2_kpts[k2] kpt1 = kpts_band[k1] kpt2 = kpts[k2] if ao2T.size == 0 or ao1T.size == 0: continue # If we have an ewald exxdiv, we add the G=0 correction near the # end of the function to bypass any discretization errors # that arise from the FFT. mydf.exxdiv = exxdiv if exxdiv == 'ewald' or exxdiv is None: coulG = tools.get_coulG(cell, kpt2-kpt1, False, mydf, mesh) else: coulG = tools.get_coulG(cell, kpt2-kpt1, True, mydf, mesh) if is_zero(kpt1-kpt2): expmikr = numpy.array(1.) else: expmikr = numpy.exp(-1j * numpy.dot(coords, kpt2-kpt1)) if ao_dms_buf[k2] is None: if mo_coeff is None: ao_dms = [lib.dot(dm[k2], ao2T.conj()) for dm in dms] else: ao_dms = [] for i, dm in enumerate(dms): occ = mo_occ[i][k2] mo_scaled = mo_coeff[i][k2][:,occ>0] * numpy.sqrt(occ[occ>0]) ao_dms.append(lib.dot(mo_scaled.T, ao2T).conj()) ao_dms_buf[k2] = ao_dms else: ao_dms = ao_dms_buf[k2] if mo_coeff is None: for p0, p1 in lib.prange(0, nao, blksize): rho1 = numpy.einsum('ig,jg->ijg', ao1T[p0:p1].conj()*expmikr, ao2T) vG = tools.fft(rho1.reshape(-1,ngrids), mesh) rho1 = None vG *= coulG vR = tools.ifft(vG, mesh).reshape(p1-p0,nao,ngrids) vG = None if vR_dm.dtype == numpy.double: vR = vR.real for i in range(nset): numpy.einsum('ijg,jg->ig', vR, ao_dms[i], out=vR_dm[i,p0:p1]) vR = None else: for p0, p1 in lib.prange(0, nao, blksize): for i in range(nset): rho1 = numpy.einsum('ig,jg->ijg', ao1T[p0:p1].conj()*expmikr, ao_dms[i].conj()) vG = tools.fft(rho1.reshape(-1,ngrids), mesh) rho1 = None vG *= coulG vR = tools.ifft(vG, mesh).reshape(p1-p0,-1,ngrids) vG = None if vR_dm.dtype == numpy.double: vR = vR.real numpy.einsum('ijg,jg->ig', vR, ao_dms[i], out=vR_dm[i,p0:p1]) vR = None vR_dm *= expmikr.conj() for i in range(nset): vk_kpts[i,k1] += weight * lib.dot(vR_dm[i], ao1T.T) vk_kpts = mpi.reduce(lib.asarray(vk_kpts)) if gamma_point(kpts_band) and gamma_point(kpts): vk_kpts = vk_kpts.real if rank == 0: if exxdiv == 'ewald': _ewald_exxdiv_for_G0(cell, kpts, dms, vk_kpts, kpts_band=kpts_band) return _format_jks(vk_kpts, dm_kpts, input_band, kpts)
def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1,3)), kpts_band=None, exxdiv=None): cell = mydf.cell log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (logger.process_clock(), logger.perf_counter()) mesh = mydf.mesh dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] swap_2e = (kpts_band is None) kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) kk_table = kpts_band.reshape(-1,1,3) - kpts.reshape(1,-1,3) kk_todo = numpy.ones(kk_table.shape[:2], dtype=bool) vkR = numpy.zeros((nset,nband,nao,nao)) vkI = numpy.zeros((nset,nband,nao,nao)) dmsR = numpy.asarray(dms.real, order='C') dmsI = numpy.asarray(dms.imag, order='C') mem_now = lib.current_memory()[0] max_memory = max(2000, (mydf.max_memory - mem_now)) * .8 log.debug1('max_memory = %d MB (%d in use)', max_memory, mem_now) # K_pq = ( p{k1} i{k2} | i{k2} q{k1} ) def make_kpt(kpt): # kpt = kptj - kpti # search for all possible ki and kj that has ki-kj+kpt=0 kk_match = numpy.einsum('ijx->ij', abs(kk_table + kpt)) < 1e-9 kpti_idx, kptj_idx = numpy.where(kk_todo & kk_match) nkptj = len(kptj_idx) log.debug1('kpt = %s', kpt) log.debug2('kpti_idx = %s', kpti_idx) log.debug2('kptj_idx = %s', kptj_idx) kk_todo[kpti_idx,kptj_idx] = False if swap_2e and not is_zero(kpt): kk_todo[kptj_idx,kpti_idx] = False max_memory1 = max_memory * (nkptj+1)/(nkptj+5) #blksize = max(int(max_memory1*4e6/(nkptj+5)/16/nao**2), 16) #bufR = numpy.empty((blksize*nao**2)) #bufI = numpy.empty((blksize*nao**2)) # Use DF object to mimic KRHF/KUHF object in function get_coulG vkcoulG = mydf.weighted_coulG(kpt, exxdiv, mesh) kptjs = kpts[kptj_idx] weight = 1./len(kpts) perm_sym = swap_2e and not is_zero(kpt) for aoaoks, p0, p1 in mydf.ft_loop(mesh, kpt, kptjs, max_memory=max_memory1): _update_vk_((vkR, vkI), aoaoks, (dmsR, dmsI), vkcoulG[p0:p1], weight, kpti_idx, kptj_idx, perm_sym) for ki, kpti in enumerate(kpts_band): for kj, kptj in enumerate(kpts): if kk_todo[ki,kj]: make_kpt(kptj-kpti) t1 = log.timer_debug1('get_k_kpts: make_kpt (%d,*)'%ki, *t1) if (gamma_point(kpts) and gamma_point(kpts_band) and not numpy.iscomplexobj(dm_kpts)): vk_kpts = vkR else: vk_kpts = vkR + vkI * 1j # Add ewald_exxdiv contribution because G=0 was not included in the # non-uniform grids if (exxdiv == 'ewald' and (cell.dimension < 2 or # 0D and 1D are computed with inf_vacuum (cell.dimension == 2 and cell.low_dim_ft_type == 'inf_vacuum'))): _ewald_exxdiv_for_G0(cell, kpts_band, dms, vk_kpts, kpts_band) return _format_jks(vk_kpts, dm_kpts, input_band, kpts)