def add_wvvVV_(self, t1, t2, eris, t2new_tril): time0 = time.clock(), time.time() nocc, nvir = t1.shape #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: t2new += numpy.einsum('ijcd,acdb->ijab', tau, vvvv) tau = numpy.empty((nocc*(nocc+1)//2,nvir,nvir)) p0 = 0 for i in range(nocc): tau[p0:p0+i+1] = numpy.einsum('a,jb->jab', t1[i], t1[:i+1]) tau[p0:p0+i+1] += t2[i,:i+1] p0 += i + 1 time0 = logger.timer_debug1(self, 'vvvv-tau', *time0) p0 = 0 outbuf = numpy.empty((nvir,nvir,nvir)) for a in range(nvir): buf = _ccsd.unpack_tril(eris.vvvv[p0:p0+a+1], out=outbuf[:a+1]) #: t2new_tril[i,:i+1, a] += numpy.einsum('xcd,cdb->xb', tau[:,:a+1], buf) lib.numpy_helper._dgemm('N', 'N', nocc*(nocc+1)//2, nvir, (a+1)*nvir, tau.reshape(-1,nvir*nvir), buf.reshape(-1,nvir), t2new_tril.reshape(-1,nvir*nvir), 1, 1, 0, 0, a*nvir) #: t2new_tril[i,:i+1,:a] += numpy.einsum('xd,abd->xab', tau[:,a], buf[:a]) if a > 0: lib.numpy_helper._dgemm('N', 'T', nocc*(nocc+1)//2, a*nvir, nvir, tau.reshape(-1,nvir*nvir), buf.reshape(-1,nvir), t2new_tril.reshape(-1,nvir*nvir), 1, 1, a*nvir, 0, 0) p0 += a+1 time0 = logger.timer_debug1(self, 'vvvv %d'%a, *time0) return t2new_tril
def add_wvvVV_(self, t1, t2, eris, t2new_tril, max_memory=2000): time0 = time.clock(), time.time() nocc, nvir = t1.shape #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: t2new += numpy.einsum('ijcd,acdb->ijab', tau, vvvv) tau = numpy.empty((nocc*(nocc+1)//2,nvir,nvir)) p0 = 0 for i in range(nocc): tau[p0:p0+i+1] = numpy.einsum('a,jb->jab', t1[i], t1[:i+1]) tau[p0:p0+i+1] += t2[i,:i+1] p0 += i + 1 time0 = logger.timer_debug1(self, 'vvvv-tau', *time0) p0 = 0 outbuf = numpy.empty((nvir,nvir,nvir)) for a in range(nvir): buf = _ccsd.unpack_tril(eris.vvvv[p0:p0+a+1], out=outbuf[:a+1]) #: t2new_tril[i,:i+1, a] += numpy.einsum('xcd,cdb->xb', tau[:,:a+1], buf) lib.numpy_helper._dgemm('N', 'N', nocc*(nocc+1)//2, nvir, (a+1)*nvir, tau.reshape(-1,nvir*nvir), buf.reshape(-1,nvir), t2new_tril.reshape(-1,nvir*nvir), 1, 1, 0, 0, a*nvir) #: t2new_tril[i,:i+1,:a] += numpy.einsum('xd,abd->xab', tau[:,a], buf[:a]) if a > 0: lib.numpy_helper._dgemm('N', 'T', nocc*(nocc+1)//2, a*nvir, nvir, tau.reshape(-1,nvir*nvir), buf.reshape(-1,nvir), t2new_tril.reshape(-1,nvir*nvir), 1, 1, a*nvir, 0, 0) p0 += a+1 time0 = logger.timer_debug1(self, 'vvvv %d'%a, *time0) return t2new_tril
def _make_shared(self): cput0 = (time.clock(), time.time()) t1, t2, eris = self.t1, self.t2, self.eris self.Foo = imd.Foo(t1, t2, eris) self.Fvv = imd.Fvv(t1, t2, eris) self.Fov = imd.Fov(t1, t2, eris) # 2 virtuals self.Wovvo = imd.Wovvo(t1, t2, eris) self.Woovv = eris.oovv self._made_shared = True logger.timer_debug1(self, 'EOM-CCSD shared intermediates', *cput0) return self
def _make_shared(self): cput0 = (logger.process_clock(), logger.perf_counter()) t1, t2, eris = self.t1, self.t2, self.eris self.Foo = imd.Foo(t1, t2, eris) self.Fvv = imd.Fvv(t1, t2, eris) self.Fov = imd.Fov(t1, t2, eris) # 2 virtuals self.Wovvo = imd.Wovvo(t1, t2, eris) self.Woovv = eris.oovv self._made_shared = True logger.timer_debug1(self, 'EOM-CCSD shared intermediates', *cput0) return self
def make_t3p2_ea(self, cc): cput0 = (logger.process_clock(), logger.perf_counter()) t1, t2, eris = cc.t1, cc.t2, self.eris delta_E_corr, pt1, pt2, Wovoo, Wvvvo = \ imd.get_t3p2_imds_slow(cc, t1, t2, eris) self.t1 = pt1 self.t2 = pt2 self._made_shared = False # Force update self.make_ea() # Make after t1/t2 updated self.Wvvvo = self.Wvvvo + Wvvvo self.made_ea_imds = True logger.timer_debug1(self, 'EOM-CCSD(T)a EA intermediates', *cput0) return self
def make_ea(self): if not self._made_shared: self._make_shared() cput0 = (time.clock(), time.time()) t1, t2, eris = self.t1, self.t2, self.eris # 3 or 4 virtuals self.Wvovv = imd.Wvovv(t1, t2, eris) self.Wvvvv = imd.Wvvvv(t1, t2, eris) self.Wvvvo = imd.Wvvvo(t1, t2, eris,self.Wvvvv) self.made_ea_imds = True logger.timer_debug1(self, 'EOM-CCSD EA intermediates', *cput0) return self
def make_ip(self): if not self._made_shared: self._make_shared() cput0 = (logger.process_clock(), logger.perf_counter()) t1, t2, eris = self.t1, self.t2, self.eris # 0 or 1 virtuals self.Woooo = imd.Woooo(t1, t2, eris) self.Wooov = imd.Wooov(t1, t2, eris) self.Wovoo = imd.Wovoo(t1, t2, eris) self.made_ip_imds = True logger.timer_debug1(self, 'EOM-CCSD IP intermediates', *cput0) return self
def make_t3p2_ip(self, cc): cput0 = (time.clock(), time.time()) t1, t2, eris = cc.t1, cc.t2, self.eris delta_E_corr, pt1, pt2, Wovoo, Wvvvo = \ imd.get_t3p2_imds_slow(cc, t1, t2, eris) self.t1 = pt1 self.t2 = pt2 self._made_shared = False # Force update self.make_ip() # Make after t1/t2 updated self.Wovoo = self.Wovoo + Wovoo self.made_ip_imds = True logger.timer_debug1(self, 'EOM-CCSD(T)a IP intermediates', *cput0) return self
def make_ip(self): if not self._made_shared: self._make_shared() cput0 = (time.clock(), time.time()) t1, t2, eris = self.t1, self.t2, self.eris # 0 or 1 virtuals self.Woooo = imd.Woooo(t1, t2, eris) self.Wooov = imd.Wooov(t1, t2, eris) self.Wovoo = imd.Wovoo(t1, t2, eris) self.made_ip_imds = True logger.timer_debug1(self, 'EOM-CCSD IP intermediates', *cput0) return self
def make_ea(self): if not self._made_shared: self._make_shared() cput0 = (logger.process_clock(), logger.perf_counter()) t1, t2, eris = self.t1, self.t2, self.eris # 3 or 4 virtuals self.Wvovv = imd.Wvovv(t1, t2, eris) self.Wvvvv = imd.Wvvvv(t1, t2, eris) self.Wvvvo = imd.Wvvvo(t1, t2, eris, self.Wvvvv) self.made_ea_imds = True logger.timer_debug1(self, 'EOM-CCSD EA intermediates', *cput0) return self
def make_ip(self): if not self._made_shared: self._make_shared() cput0 = (time.clock(), time.time()) kconserv = self.kconserv t1, t2, eris = self.t1, self.t2, self.eris # 0 or 1 virtuals self.Woooo = imd.Woooo(self._cc, t1, t2, eris, kconserv) self.Wooov = imd.Wooov(self._cc, t1, t2, eris, kconserv) self.Wovoo = imd.Wovoo(self._cc, t1, t2, eris, kconserv) self.made_ip_imds = True logger.timer_debug1(self, 'EOM-CCSD IP intermediates', *cput0) return self
def _make_shared(self): cput0 = (time.clock(), time.time()) kconserv = self.kconserv t1, t2, eris = self.t1, self.t2, self.eris self.Foo = imd.Foo(self._cc, t1, t2, eris, kconserv) self.Fvv = imd.Fvv(self._cc, t1, t2, eris, kconserv) self.Fov = imd.Fov(self._cc, t1, t2, eris, kconserv) # 2 virtuals self.Wovvo = imd.Wovvo(self._cc, t1, t2, eris, kconserv) self.Woovv = eris.oovv self._made_shared = True logger.timer_debug1(self, 'EOM-CCSD shared intermediates', *cput0) return self
def build(self, j_only=None, with_j3c=True, kpts_band=None): if self.kpts_band is not None: self.kpts_band = numpy.reshape(self.kpts_band, (-1, 3)) if kpts_band is not None: kpts_band = numpy.reshape(kpts_band, (-1, 3)) if self.kpts_band is None: self.kpts_band = kpts_band else: self.kpts_band = unique( numpy.vstack((self.kpts_band, kpts_band)))[0] self.check_sanity() self.dump_flags() self.auxcell = make_modrho_basis(self.cell, self.auxbasis, self.exp_to_discard) if self.kpts_band is None: kpts = self.kpts kband_uniq = numpy.zeros((0, 3)) else: kpts = self.kpts kband_uniq = [ k for k in self.kpts_band if len(member(k, kpts)) == 0 ] if j_only is None: j_only = self._j_only if j_only: kall = numpy.vstack([kpts, kband_uniq]) kptij_lst = numpy.hstack((kall, kall)).reshape(-1, 2, 3) else: kptij_lst = [(ki, kpts[j]) for i, ki in enumerate(kpts) for j in range(i + 1)] kptij_lst.extend([(ki, kj) for ki in kband_uniq for kj in kpts]) kptij_lst.extend([(ki, ki) for ki in kband_uniq]) kptij_lst = numpy.asarray(kptij_lst) if with_j3c: if isinstance(self._cderi_to_save, str): cderi = self._cderi_to_save else: cderi = self._cderi_to_save.name if isinstance(self._cderi, str): if self._cderi == cderi and os.path.isfile(cderi): logger.warn( self, 'DF integrals in %s (specified by ' '._cderi) is overwritten by GDF ' 'initialization. ', cderi) else: logger.warn( self, 'Value of ._cderi is ignored. ' 'DF integrals will be saved in file %s .', cderi) self._cderi = cderi t1 = (time.clock(), time.time()) self._make_j3c(self.cell, self.auxcell, kptij_lst, cderi) t1 = logger.timer_debug1(self, 'j3c', *t1) return self
def get_gridss(mol, level=1, gthrd=1e-10): Ktime = (time.clock(), time.time()) grids = dft.gen_grid.Grids(mol) grids.level = level grids.build() ao_v = mol.eval_gto('GTOval', grids.coords) ao_v *= grids.weights[:, None] wao_v0 = ao_v mask = numpy.any(wao_v0 > gthrd, axis=1) | numpy.any(wao_v0 < -gthrd, axis=1) grids.coords = grids.coords[mask] grids.weights = grids.weights[mask] logger.debug(mol, 'threshold for grids screening %g', gthrd) logger.debug(mol, 'number of grids %d', grids.weights.size) logger.timer_debug1(mol, "Xg screening", *Ktime) return grids
def build(self, j_only=None, with_j3c=True, kpts_band=None): if self.kpts_band is not None: self.kpts_band = numpy.reshape(self.kpts_band, (-1,3)) if kpts_band is not None: kpts_band = numpy.reshape(kpts_band, (-1,3)) if self.kpts_band is None: self.kpts_band = kpts_band else: self.kpts_band = unique(numpy.vstack((self.kpts_band,kpts_band)))[0] self.check_sanity() self.dump_flags() self.auxcell = make_modrho_basis(self.cell, self.auxbasis, self.exp_to_discard) # Remove duplicated k-points. Duplicated kpts may lead to a buffer # located in incore.wrap_int3c larger than necessary. Integral code # only fills necessary part of the buffer, leaving some space in the # buffer unfilled. uniq_idx = unique(self.kpts)[1] kpts = numpy.asarray(self.kpts)[uniq_idx] if self.kpts_band is None: kband_uniq = numpy.zeros((0,3)) else: kband_uniq = [k for k in self.kpts_band if len(member(k, kpts))==0] if j_only is None: j_only = self._j_only if j_only: kall = numpy.vstack([kpts,kband_uniq]) kptij_lst = numpy.hstack((kall,kall)).reshape(-1,2,3) else: kptij_lst = [(ki, kpts[j]) for i, ki in enumerate(kpts) for j in range(i+1)] kptij_lst.extend([(ki, kj) for ki in kband_uniq for kj in kpts]) kptij_lst.extend([(ki, ki) for ki in kband_uniq]) kptij_lst = numpy.asarray(kptij_lst) if with_j3c: if isinstance(self._cderi_to_save, str): cderi = self._cderi_to_save else: cderi = self._cderi_to_save.name if isinstance(self._cderi, str): if self._cderi == cderi and os.path.isfile(cderi): logger.warn(self, 'DF integrals in %s (specified by ' '._cderi) is overwritten by GDF ' 'initialization. ', cderi) else: logger.warn(self, 'Value of ._cderi is ignored. ' 'DF integrals will be saved in file %s .', cderi) self._cderi = cderi t1 = (logger.process_clock(), logger.perf_counter()) self._make_j3c(self.cell, self.auxcell, kptij_lst, cderi) t1 = logger.timer_debug1(self, 'j3c', *t1) return self
def make_ea(self): if not self._made_shared: self._make_shared() cput0 = (time.clock(), time.time()) kconserv = self.kconserv t1, t2, eris = self.t1, self.t2, self.eris # FIXME DELETE WOOOO # 0 or 1 virtuals self.Woooo = imd.Woooo(self._cc, t1, t2, eris, kconserv) # 3 or 4 virtuals self.Wvovv = imd.Wvovv(self._cc, t1, t2, eris, kconserv) self.Wvvvv = imd.Wvvvv(self._cc, t1, t2, eris, kconserv) self.Wvvvo = imd.Wvvvo(self._cc, t1, t2, eris, kconserv) self.made_ea_imds = True logger.timer_debug1(self, 'EOM-CCSD EA intermediates', *cput0) return self
def build(self, j_only=None, with_j3c=True, kpts_band=None): if self.kpts_band is not None: self.kpts_band = numpy.reshape(self.kpts_band, (-1,3)) if kpts_band is not None: kpts_band = numpy.reshape(kpts_band, (-1,3)) if self.kpts_band is None: self.kpts_band = kpts_band else: self.kpts_band = unique(numpy.vstack((self.kpts_band,kpts_band)))[0] self.check_sanity() self.dump_flags() self.auxcell = make_modrho_basis(self.cell, self.auxbasis, self.exp_to_discard) if self.kpts_band is None: kpts = self.kpts kband_uniq = numpy.zeros((0,3)) else: kpts = self.kpts kband_uniq = [k for k in self.kpts_band if len(member(k, kpts))==0] if j_only is None: j_only = self._j_only if j_only: kall = numpy.vstack([kpts,kband_uniq]) kptij_lst = numpy.hstack((kall,kall)).reshape(-1,2,3) else: kptij_lst = [(ki, kpts[j]) for i, ki in enumerate(kpts) for j in range(i+1)] kptij_lst.extend([(ki, kj) for ki in kband_uniq for kj in kpts]) kptij_lst.extend([(ki, ki) for ki in kband_uniq]) kptij_lst = numpy.asarray(kptij_lst) if with_j3c: if isinstance(self._cderi_to_save, str): cderi = self._cderi_to_save else: cderi = self._cderi_to_save.name if isinstance(self._cderi, str): if self._cderi == cderi and os.path.isfile(cderi): logger.warn(self, 'DF integrals in %s (specified by ' '._cderi) is overwritten by GDF ' 'initialization. ', cderi) else: logger.warn(self, 'Value of ._cderi is ignored. ' 'DF integrals will be saved in file %s .', cderi) self._cderi = cderi t1 = (time.clock(), time.time()) self._make_j3c(self.cell, self.auxcell, kptij_lst, cderi) t1 = logger.timer_debug1(self, 'j3c', *t1) return self
def get_gridss(mol, level=1, gthrd=1e-10): Ktime = (time.clock(), time.time()) grids = dft.gen_grid.Grids(mol) grids.level = level grids.build() ngrids = grids.weights.size mask = [] for p0, p1 in lib.prange(0, ngrids, 10000): ao_v = mol.eval_gto('GTOval', grids.coords[p0:p1]) ao_v *= grids.weights[p0:p1,None] wao_v0 = ao_v mask.append(numpy.any(wao_v0>gthrd, axis=1) | numpy.any(wao_v0<-gthrd, axis=1)) mask = numpy.hstack(mask) grids.coords = grids.coords[mask] grids.weights = grids.weights[mask] logger.debug(mol, 'threshold for grids screening %g', gthrd) logger.debug(mol, 'number of grids %d', grids.weights.size) logger.timer_debug1(mol, "Xg screening", *Ktime) return grids
def get_pp(mydf, kpts=None): '''Get the periodic pseudotential nuc-el AO matrix, with G=0 removed. ''' t0 = (time.clock(), time.time()) cell = mydf.cell if kpts is None: kpts_lst = numpy.zeros((1, 3)) else: kpts_lst = numpy.reshape(kpts, (-1, 3)) nkpts = len(kpts_lst) vloc1 = get_pp_loc_part1(mydf, kpts_lst) t1 = logger.timer_debug1(mydf, 'get_pp_loc_part1', *t0) vloc2 = pseudo.pp_int.get_pp_loc_part2(cell, kpts_lst) t1 = logger.timer_debug1(mydf, 'get_pp_loc_part2', *t1) vpp = pseudo.pp_int.get_pp_nl(cell, kpts_lst) for k in range(nkpts): vpp[k] += vloc1[k] + vloc2[k] t1 = logger.timer_debug1(mydf, 'get_pp_nl', *t1) if kpts is None or numpy.shape(kpts) == (3, ): vpp = vpp[0] logger.timer(mydf, 'get_pp', *t0) return vpp
def build(self, j_only=None, with_j3c=True, kpts_band=None): if self.kpts_band is not None: self.kpts_band = numpy.reshape(self.kpts_band, (-1, 3)) if kpts_band is not None: kpts_band = numpy.reshape(kpts_band, (-1, 3)) if self.kpts_band is None: self.kpts_band = kpts_band else: self.kpts_band = unique( numpy.vstack((self.kpts_band, kpts_band)))[0] self.dump_flags() self.auxcell = make_modrho_basis(self.cell, self.auxbasis, self.eta) if self.kpts_band is None: kpts = self.kpts kband_uniq = numpy.zeros((0, 3)) else: kpts = self.kpts kband_uniq = [ k for k in self.kpts_band if len(member(k, kpts)) == 0 ] if j_only is None: j_only = self._j_only if j_only: kall = numpy.vstack([kpts, kband_uniq]) kptij_lst = numpy.hstack((kall, kall)).reshape(-1, 2, 3) else: kptij_lst = [(ki, kpts[j]) for i, ki in enumerate(kpts) for j in range(i + 1)] kptij_lst.extend([(ki, kj) for ki in kband_uniq for kj in kpts]) kptij_lst.extend([(ki, ki) for ki in kband_uniq]) kptij_lst = numpy.asarray(kptij_lst) if not isinstance(self._cderi, str): if isinstance(self._cderi_file, str): self._cderi = self._cderi_file else: self._cderi = self._cderi_file.name if with_j3c: t1 = (time.clock(), time.time()) self._make_j3c(self.cell, self.auxcell, kptij_lst) t1 = logger.timer_debug1(self, 'j3c', *t1) return self
def build(self, j_only=False, with_j3c=True, kpts_band=None): if self.kpts_band is not None: self.kpts_band = numpy.reshape(self.kpts_band, (-1, 3)) if kpts_band is not None: kpts_band = numpy.reshape(kpts_band, (-1, 3)) if self.kpts_band is None: self.kpts_band = kpts_band else: self.kpts_band = unique( numpy.vstack((self.kpts_band, kpts_band)))[0] self.dump_flags() self.auxcell = make_modrho_basis(self.cell, self.auxbasis, self.eta) if self.kpts_band is None: kpts = unique(self.kpts)[0] else: kpts = unique(numpy.vstack((self.kpts, self.kpts_band)))[0] self._j_only = j_only if j_only: kptij_lst = numpy.hstack((kpts, kpts)).reshape(-1, 2, 3) else: kptij_lst = [(ki, kpts[j]) for i, ki in enumerate(kpts) for j in range(i + 1)] kptij_lst = numpy.asarray(kptij_lst) if not isinstance(self._cderi, str): if isinstance(self._cderi_file, str): self._cderi = self._cderi_file else: self._cderi = self._cderi_file.name if with_j3c: t1 = (time.clock(), time.time()) self._make_j3c(self.cell, self.auxcell, kptij_lst) t1 = logger.timer_debug1(self, 'j3c', *t1) return self
def _add_vvVV(mycc, t1, t2ab, eris, out=None): '''Ht2 = np.einsum('iJcD,acBD->iJaB', t2ab, vvVV) without using symmetry in t2ab or Ht2 ''' time0 = time.clock(), time.time() if t2ab.size == 0: return np.zeros_like(t2ab) if t1 is not None: t2ab = make_tau_ab(t2ab, t1, t1) log = logger.Logger(mycc.stdout, mycc.verbose) nocca, noccb, nvira, nvirb = t2ab.shape if mycc.direct: # AO direct CCSD if getattr(eris, 'mo_coeff', None) is not None: mo_a, mo_b = eris.mo_coeff else: moidxa, moidxb = mycc.get_frozen_mask() mo_a = mycc.mo_coeff[0][:, moidxa] mo_b = mycc.mo_coeff[1][:, moidxb] # Note tensor t2ab may be t2bbab from eom_uccsd code. In that # particular case, nocca, noccb do not equal to the actual number of # alpha/beta occupied orbitals. orbva and orbvb cannot be indexed as # mo_a[:,nocca:] and mo_b[:,noccb:] orbva = mo_a[:, -nvira:] orbvb = mo_b[:, -nvirb:] tau = lib.einsum('ijab,pa->ijpb', t2ab, orbva) tau = lib.einsum('ijab,pb->ijap', tau, orbvb) time0 = logger.timer_debug1(mycc, 'vvvv-tau mo2ao', *time0) buf = eris._contract_vvVV_t2(mycc, tau, mycc.direct, out, log) mo = np.asarray(np.hstack((orbva, orbvb)), order='F') Ht2 = _ao2mo.nr_e2(buf.reshape(nocca * noccb, -1), mo.conj(), (0, nvira, nvira, nvira + nvirb), 's1', 's1') return Ht2.reshape(t2ab.shape) else: return eris._contract_vvVV_t2(mycc, t2ab, mycc.direct, out, log)
def _add_vvVV(mycc, t1, t2ab, eris, out=None): '''Ht2 = np.einsum('iJcD,acBD->iJaB', t2ab, vvVV) without using symmetry in t2ab or Ht2 ''' time0 = time.clock(), time.time() if t2ab.size == 0: return np.zeros_like(t2ab) if t1 is not None: t2ab = make_tau_ab(t2ab, t1, t1) log = logger.Logger(mycc.stdout, mycc.verbose) nocca, noccb, nvira, nvirb = t2ab.shape if mycc.direct: # AO direct CCSD if getattr(eris, 'mo_coeff', None) is not None: mo_a, mo_b = eris.mo_coeff else: moidxa, moidxb = mycc.get_frozen_mask() mo_a = mycc.mo_coeff[0][:,moidxa] mo_b = mycc.mo_coeff[1][:,moidxb] # Note tensor t2ab may be t2bbab from eom_uccsd code. In that # particular case, nocca, noccb do not equal to the actual number of # alpha/beta occupied orbitals. orbva and orbvb cannot be indexed as # mo_a[:,nocca:] and mo_b[:,noccb:] orbva = mo_a[:,-nvira:] orbvb = mo_b[:,-nvirb:] tau = lib.einsum('ijab,pa->ijpb', t2ab, orbva) tau = lib.einsum('ijab,pb->ijap', tau, orbvb) time0 = logger.timer_debug1(mycc, 'vvvv-tau mo2ao', *time0) buf = eris._contract_vvVV_t2(mycc, tau, mycc.direct, out, log) mo = np.asarray(np.hstack((orbva, orbvb)), order='F') Ht2 = _ao2mo.nr_e2(buf.reshape(nocca*noccb,-1), mo.conj(), (0,nvira,nvira,nvira+nvirb), 's1', 's1') return Ht2.reshape(t2ab.shape) else: return eris._contract_vvVV_t2(mycc, t2ab, mycc.direct, out, log)
def add_wvvVV_(self, t2, eris, t2new_tril): time0 = time.clock(), time.time() nocc, nvir = t2.shape[1:3] #: t2new += numpy.einsum('ijcd,acdb->ijab', tau, vvvv) def contract_rec_(t2new_tril, tau, eri, i0, i1, j0, j1): nao = tau.shape[-1] ic = i1 - i0 jc = j1 - j0 #: t2tril[:,j0:j1] += numpy.einsum('xcd,cdab->xab', tau[:,i0:i1], eri) _dgemm('N', 'N', nocc * (nocc + 1) // 2, jc * nao, ic * nao, tau.reshape(-1, nao * nao), eri.reshape(-1, jc * nao), t2new_tril.reshape(-1, nao * nao), 1, 1, i0 * nao, 0, j0 * nao) #: t2tril[:,i0:i1] += numpy.einsum('xcd,abcd->xab', tau[:,j0:j1], eri) _dgemm('N', 'T', nocc * (nocc + 1) // 2, ic * nao, jc * nao, tau.reshape(-1, nao * nao), eri.reshape(-1, jc * nao), t2new_tril.reshape(-1, nao * nao), 1, 1, j0 * nao, 0, i0 * nao) def contract_tril_(t2new_tril, tau, eri, a0, a): nvir = tau.shape[-1] #: t2new[i,:i+1, a] += numpy.einsum('xcd,cdb->xb', tau[:,a0:a+1], eri) _dgemm('N', 'N', nocc * (nocc + 1) // 2, nvir, (a + 1 - a0) * nvir, tau.reshape(-1, nvir * nvir), eri.reshape(-1, nvir), t2new_tril.reshape(-1, nvir * nvir), 1, 1, a0 * nvir, 0, a * nvir) #: t2new[i,:i+1,a0:a] += numpy.einsum('xd,abd->xab', tau[:,a], eri[:a]) if a > a0: _dgemm('N', 'T', nocc * (nocc + 1) // 2, (a - a0) * nvir, nvir, tau.reshape(-1, nvir * nvir), eri.reshape(-1, nvir), t2new_tril.reshape(-1, nvir * nvir), 1, 1, a * nvir, 0, a0 * nvir) if self.direct: # AO-direct CCSD mol = self.mol nao, nmo = self.mo_coeff.shape nao_pair = nao * (nao + 1) // 2 aos = numpy.asarray(self.mo_coeff[:, nocc:].T, order='F') outbuf = numpy.empty((nocc * (nocc + 1) // 2, nao, nao)) tau = numpy.ndarray((nocc * (nocc + 1) // 2, nvir, nvir), buffer=outbuf) p0 = 0 for i in range(nocc): tau[p0:p0 + i + 1] = t2[i, :i + 1] p0 += i + 1 tau = _ao2mo.nr_e2(tau.reshape(-1, nvir**2), aos, (0, nao, 0, nao), 's1', 's1') tau = tau.reshape(-1, nao, nao) time0 = logger.timer_debug1(self, 'vvvv-tau', *time0) ao2mopt = _ao2mo.AO2MOpt(mol, 'cint2e_sph', 'CVHFnr_schwarz_cond', 'CVHFsetnr_direct_scf') outbuf[:] = 0 ao_loc = mol.ao_loc_nr() max_memory = max(0, self.max_memory - lib.current_memory()[0]) dmax = max(4, int(numpy.sqrt(max_memory * .95e6 / 8 / nao**2 / 2))) sh_ranges = ao2mo.outcore.balance_partition(ao_loc, dmax) dmax = max(x[2] for x in sh_ranges) eribuf = numpy.empty((dmax, dmax, nao, nao)) loadbuf = numpy.empty((dmax, dmax, nao, nao)) fint = gto.moleintor.getints2e for ip, (ish0, ish1, ni) in enumerate(sh_ranges): for jsh0, jsh1, nj in sh_ranges[:ip]: eri = fint('cint2e_sph', mol._atm, mol._bas, mol._env, shls_slice=(ish0, ish1, jsh0, jsh1), aosym='s2kl', ao_loc=ao_loc, cintopt=ao2mopt._cintopt, out=eribuf) i0, i1 = ao_loc[ish0], ao_loc[ish1] j0, j1 = ao_loc[jsh0], ao_loc[jsh1] tmp = numpy.ndarray((i1 - i0, nao, j1 - j0, nao), buffer=loadbuf) _ccsd.libcc.CCload_eri(tmp.ctypes.data_as(ctypes.c_void_p), eri.ctypes.data_as(ctypes.c_void_p), (ctypes.c_int * 4)(i0, i1, j0, j1), ctypes.c_int(nao)) contract_rec_(outbuf, tau, tmp, i0, i1, j0, j1) time0 = logger.timer_debug1( self, 'AO-vvvv [%d:%d,%d:%d]' % (ish0, ish1, jsh0, jsh1), *time0) eri = fint('cint2e_sph', mol._atm, mol._bas, mol._env, shls_slice=(ish0, ish1, ish0, ish1), aosym='s4', ao_loc=ao_loc, cintopt=ao2mopt._cintopt, out=eribuf) i0, i1 = ao_loc[ish0], ao_loc[ish1] for i in range(i1 - i0): p0, p1 = i * (i + 1) // 2, (i + 1) * (i + 2) // 2 tmp = lib.unpack_tril(eri[p0:p1], out=loadbuf) contract_tril_(outbuf, tau, tmp, i0, i0 + i) time0 = logger.timer_debug1( self, 'AO-vvvv [%d:%d,%d:%d]' % (ish0, ish1, ish0, ish1), *time0) eribuf = loadbuf = eri = tmp = None mo = numpy.asarray(self.mo_coeff, order='F') tmp = _ao2mo.nr_e2(outbuf, mo, (nocc, nmo, nocc, nmo), 's1', 's1', out=tau) t2new_tril += tmp.reshape(-1, nvir, nvir) else: tau = numpy.empty((nocc * (nocc + 1) // 2, nvir, nvir)) p0 = 0 for i in range(nocc): tau[p0:p0 + i + 1] = t2[i, :i + 1] p0 += i + 1 p0 = 0 outbuf = numpy.empty((nvir, nvir, nvir)) outbuf1 = numpy.empty((nvir, nvir, nvir)) handler = None for a in range(nvir): buf = lib.unpack_tril(eris.vvvv[p0:p0 + a + 1], out=outbuf) outbuf, outbuf1 = outbuf1, outbuf handler = async_do(handler, contract_tril_, t2new_tril, tau, buf, 0, a) p0 += a + 1 time0 = logger.timer_debug1(self, 'vvvv %d' % a, *time0) handler.join() return t2new_tril
def get_j(dfobj, dm, hermi=1, direct_scf_tol=1e-13): from pyscf.scf import _vhf from pyscf.scf import jk from pyscf.df import addons t0 = t1 = (time.clock(), time.time()) mol = dfobj.mol if dfobj._vjopt is None: dfobj.auxmol = auxmol = addons.make_auxmol(mol, dfobj.auxbasis) opt = _vhf.VHFOpt(mol, 'int3c2e', 'CVHFnr3c2e_schwarz_cond') opt.direct_scf_tol = direct_scf_tol # q_cond part 1: the regular int2e (ij|ij) for mol's basis opt.init_cvhf_direct(mol, 'int2e', 'CVHFsetnr_direct_scf') mol_q_cond = lib.frompointer(opt._this.contents.q_cond, mol.nbas**2) # Update q_cond to include the 2e-integrals (auxmol|auxmol) j2c = auxmol.intor('int2c2e', hermi=1) j2c_diag = numpy.sqrt(abs(j2c.diagonal())) aux_loc = auxmol.ao_loc aux_q_cond = [ j2c_diag[i0:i1].max() for i0, i1 in zip(aux_loc[:-1], aux_loc[1:]) ] q_cond = numpy.hstack((mol_q_cond, aux_q_cond)) fsetqcond = _vhf.libcvhf.CVHFset_q_cond fsetqcond(opt._this, q_cond.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(q_cond.size)) try: opt.j2c = j2c = scipy.linalg.cho_factor(j2c, lower=True) opt.j2c_type = 'cd' except scipy.linalg.LinAlgError: opt.j2c = j2c opt.j2c_type = 'regular' # jk.get_jk function supports 4-index integrals. Use bas_placeholder # (l=0, nctr=1, 1 function) to hold the last index. bas_placeholder = numpy.array([0, 0, 1, 1, 0, 0, 0, 0], dtype=numpy.int32) fakemol = mol + auxmol fakemol._bas = numpy.vstack((fakemol._bas, bas_placeholder)) opt.fakemol = fakemol dfobj._vjopt = opt t1 = logger.timer_debug1(dfobj, 'df-vj init_direct_scf', *t1) opt = dfobj._vjopt fakemol = opt.fakemol dm = numpy.asarray(dm, order='C') dm_shape = dm.shape nao = dm_shape[-1] dm = dm.reshape(-1, nao, nao) n_dm = dm.shape[0] # First compute the density in auxiliary basis # j3c = fauxe2(mol, auxmol) # jaux = numpy.einsum('ijk,ji->k', j3c, dm) # rho = numpy.linalg.solve(auxmol.intor('int2c2e'), jaux) nbas = mol.nbas nbas1 = mol.nbas + dfobj.auxmol.nbas shls_slice = (0, nbas, 0, nbas, nbas, nbas1, nbas1, nbas1 + 1) with lib.temporary_env(opt, prescreen='CVHFnr3c2e_vj_pass1_prescreen', _dmcondname='CVHFsetnr_direct_scf_dm'): jaux = jk.get_jk(fakemol, dm, ['ijkl,ji->kl'] * n_dm, 'int3c2e', aosym='s2ij', hermi=0, shls_slice=shls_slice, vhfopt=opt) # remove the index corresponding to bas_placeholder jaux = numpy.array(jaux)[:, :, 0] t1 = logger.timer_debug1(dfobj, 'df-vj pass 1', *t1) if opt.j2c_type == 'cd': rho = scipy.linalg.cho_solve(opt.j2c, jaux.T) else: rho = scipy.linalg.solve(opt.j2c, jaux.T) # transform rho to shape (:,1,naux), to adapt to 3c2e integrals (ij|k) rho = rho.T[:, numpy.newaxis, :] t1 = logger.timer_debug1(dfobj, 'df-vj solve ', *t1) # Next compute the Coulomb matrix # j3c = fauxe2(mol, auxmol) # vj = numpy.einsum('ijk,k->ij', j3c, rho) with lib.temporary_env(opt, prescreen='CVHFnr3c2e_vj_pass2_prescreen', _dmcondname=None): # CVHFnr3c2e_vj_pass2_prescreen requires custom dm_cond aux_loc = dfobj.auxmol.ao_loc dm_cond = [ abs(rho[:, :, i0:i1]).max() for i0, i1 in zip(aux_loc[:-1], aux_loc[1:]) ] dm_cond = numpy.array(dm_cond) fsetcond = _vhf.libcvhf.CVHFset_dm_cond fsetcond(opt._this, dm_cond.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(dm_cond.size)) vj = jk.get_jk(fakemol, rho, ['ijkl,lk->ij'] * n_dm, 'int3c2e', aosym='s2ij', hermi=1, shls_slice=shls_slice, vhfopt=opt) t1 = logger.timer_debug1(dfobj, 'df-vj pass 2', *t1) logger.timer(dfobj, 'df-vj', *t0) return numpy.asarray(vj).reshape(dm_shape)
def add_wvvVV_(self, t2, eris, t2new_tril): time0 = time.clock(), time.time() nocc, nvir = t2.shape[1:3] #: t2new += numpy.einsum('ijcd,acdb->ijab', tau, vvvv) def contract_rec_(t2new_tril, tau, eri, i0, i1, j0, j1): nao = tau.shape[-1] ic = i1 - i0 jc = j1 - j0 #: t2tril[:,j0:j1] += numpy.einsum('xcd,cdab->xab', tau[:,i0:i1], eri) _dgemm('N', 'N', nocc*(nocc+1)//2, jc*nao, ic*nao, tau.reshape(-1,nao*nao), eri.reshape(-1,jc*nao), t2new_tril.reshape(-1,nao*nao), 1, 1, i0*nao, 0, j0*nao) #: t2tril[:,i0:i1] += numpy.einsum('xcd,abcd->xab', tau[:,j0:j1], eri) _dgemm('N', 'T', nocc*(nocc+1)//2, ic*nao, jc*nao, tau.reshape(-1,nao*nao), eri.reshape(-1,jc*nao), t2new_tril.reshape(-1,nao*nao), 1, 1, j0*nao, 0, i0*nao) def contract_tril_(t2new_tril, tau, eri, a0, a): nvir = tau.shape[-1] #: t2new[i,:i+1, a] += numpy.einsum('xcd,cdb->xb', tau[:,a0:a+1], eri) _dgemm('N', 'N', nocc*(nocc+1)//2, nvir, (a+1-a0)*nvir, tau.reshape(-1,nvir*nvir), eri.reshape(-1,nvir), t2new_tril.reshape(-1,nvir*nvir), 1, 1, a0*nvir, 0, a*nvir) #: t2new[i,:i+1,a0:a] += numpy.einsum('xd,abd->xab', tau[:,a], eri[:a]) if a > a0: _dgemm('N', 'T', nocc*(nocc+1)//2, (a-a0)*nvir, nvir, tau.reshape(-1,nvir*nvir), eri.reshape(-1,nvir), t2new_tril.reshape(-1,nvir*nvir), 1, 1, a*nvir, 0, a0*nvir) if self.direct: # AO-direct CCSD mol = self.mol nao, nmo = self.mo_coeff.shape nao_pair = nao * (nao+1) // 2 aos = numpy.asarray(self.mo_coeff[:,nocc:].T, order='F') outbuf = numpy.empty((nocc*(nocc+1)//2,nao,nao)) tau = numpy.ndarray((nocc*(nocc+1)//2,nvir,nvir), buffer=outbuf) p0 = 0 for i in range(nocc): tau[p0:p0+i+1] = t2[i,:i+1] p0 += i + 1 tau = _ao2mo.nr_e2(tau.reshape(-1,nvir**2), aos, (0,nao,0,nao), 's1', 's1') tau = tau.reshape(-1,nao,nao) time0 = logger.timer_debug1(self, 'vvvv-tau', *time0) ao2mopt = _ao2mo.AO2MOpt(mol, 'cint2e_sph', 'CVHFnr_schwarz_cond', 'CVHFsetnr_direct_scf') outbuf[:] = 0 ao_loc = mol.ao_loc_nr() max_memory = max(0, self.max_memory - lib.current_memory()[0]) dmax = max(4, int(numpy.sqrt(max_memory*.95e6/8/nao**2/2))) sh_ranges = ao2mo.outcore.balance_partition(ao_loc, dmax) dmax = max(x[2] for x in sh_ranges) eribuf = numpy.empty((dmax,dmax,nao,nao)) loadbuf = numpy.empty((dmax,dmax,nao,nao)) fint = gto.moleintor.getints2e for ip, (ish0, ish1, ni) in enumerate(sh_ranges): for jsh0, jsh1, nj in sh_ranges[:ip]: eri = fint('cint2e_sph', mol._atm, mol._bas, mol._env, shls_slice=(ish0,ish1,jsh0,jsh1), aosym='s2kl', ao_loc=ao_loc, cintopt=ao2mopt._cintopt, out=eribuf) i0, i1 = ao_loc[ish0], ao_loc[ish1] j0, j1 = ao_loc[jsh0], ao_loc[jsh1] tmp = numpy.ndarray((i1-i0,nao,j1-j0,nao), buffer=loadbuf) _ccsd.libcc.CCload_eri(tmp.ctypes.data_as(ctypes.c_void_p), eri.ctypes.data_as(ctypes.c_void_p), (ctypes.c_int*4)(i0, i1, j0, j1), ctypes.c_int(nao)) contract_rec_(outbuf, tau, tmp, i0, i1, j0, j1) time0 = logger.timer_debug1(self, 'AO-vvvv [%d:%d,%d:%d]' % (ish0,ish1,jsh0,jsh1), *time0) eri = fint('cint2e_sph', mol._atm, mol._bas, mol._env, shls_slice=(ish0,ish1,ish0,ish1), aosym='s4', ao_loc=ao_loc, cintopt=ao2mopt._cintopt, out=eribuf) i0, i1 = ao_loc[ish0], ao_loc[ish1] for i in range(i1-i0): p0, p1 = i*(i+1)//2, (i+1)*(i+2)//2 tmp = lib.unpack_tril(eri[p0:p1], out=loadbuf) contract_tril_(outbuf, tau, tmp, i0, i0+i) time0 = logger.timer_debug1(self, 'AO-vvvv [%d:%d,%d:%d]' % (ish0,ish1,ish0,ish1), *time0) eribuf = loadbuf = eri = tmp = None mo = numpy.asarray(self.mo_coeff, order='F') tmp = _ao2mo.nr_e2(outbuf, mo, (nocc,nmo,nocc,nmo), 's1', 's1', out=tau) t2new_tril += tmp.reshape(-1,nvir,nvir) else: tau = numpy.empty((nocc*(nocc+1)//2,nvir,nvir)) p0 = 0 for i in range(nocc): tau[p0:p0+i+1] = t2[i,:i+1] p0 += i + 1 p0 = 0 outbuf = numpy.empty((nvir,nvir,nvir)) outbuf1 = numpy.empty((nvir,nvir,nvir)) handler = None for a in range(nvir): buf = lib.unpack_tril(eris.vvvv[p0:p0+a+1], out=outbuf) outbuf, outbuf1 = outbuf1, outbuf handler = async_do(handler, contract_tril_, t2new_tril, tau, buf, 0, a) p0 += a+1 time0 = logger.timer_debug1(self, 'vvvv %d'%a, *time0) handler.join() return t2new_tril
def get_t3p2_imds(mycc, t1, t2, eris=None, t3p2_ip_out=None, t3p2_ea_out=None): """For a description of arguments, see `get_t3p2_imds_slow` in the corresponding `kintermediates.py`. """ from pyscf.pbc.cc.kccsd_t_rhf import _get_epqr cpu1 = cpu0 = (logger.process_clock(), logger.perf_counter()) if eris is None: eris = mycc.ao2mo() fock = eris.fock nkpts, nocc, nvir = t1.shape cell = mycc._scf.cell kpts = mycc.kpts kconserv = mycc.khelper.kconserv dtype = np.result_type(t1, t2) fov = fock[:, :nocc, nocc:] #foo = np.asarray([fock[ikpt, :nocc, :nocc].diagonal() for ikpt in range(nkpts)]) #fvv = np.asarray([fock[ikpt, nocc:, nocc:].diagonal() for ikpt in range(nkpts)]) mo_energy_occ = np.array( [eris.mo_energy[ki][:nocc] for ki in range(nkpts)]) mo_energy_vir = np.array( [eris.mo_energy[ki][nocc:] for ki in range(nkpts)]) mo_e_o = mo_energy_occ mo_e_v = mo_energy_vir ccsd_energy = mycc.energy(t1, t2, eris) if t3p2_ip_out is None: t3p2_ip_out = np.zeros((nkpts, nkpts, nkpts, nocc, nvir, nocc, nocc), dtype=dtype) Wmcik = t3p2_ip_out if t3p2_ea_out is None: t3p2_ea_out = np.zeros((nkpts, nkpts, nkpts, nvir, nvir, nvir, nocc), dtype=dtype) Wacek = t3p2_ea_out # Create necessary temporary eris for fast read from pyscf.pbc.cc.kccsd_t_rhf import create_t3_eris, get_data_slices feri_tmp, t2T, eris_vvop, eris_vooo_C = create_t3_eris( mycc, kconserv, [eris.vovv, eris.oovv, eris.ooov, t2]) #t1T = np.array([x.T for x in t1], dtype=np.complex, order='C') #fvo = np.array([x.T for x in fov], dtype=np.complex, order='C') cpu1 = logger.timer_debug1(mycc, 'CCSD(T) tmp eri creation', *cpu1) def get_w(ki, kj, kk, ka, kb, kc, a0, a1, b0, b1, c0, c1): '''Wijkabc intermediate as described in Scuseria paper before Pijkabc acts Function copied for `kccsd_t_rhf.py`''' km = kconserv[kc, kk, kb] kf = kconserv[kk, kc, kj] out = einsum('cfjk,abif->abcijk', t2T[kc, kf, kj, c0:c1, :, :, :], eris_vvop[ka, kb, ki, a0:a1, b0:b1, :, nocc:]) out = out - einsum('cbmk,aijm->abcijk', t2T[kc, kb, km, c0:c1, b0:b1, :, :], eris_vooo_C[ka, ki, kj, a0:a1, :, :, :]) return out def get_permuted_w(ki, kj, kk, ka, kb, kc, orb_indices): '''Pijkabc operating on Wijkabc intermediate as described in Scuseria paper Function copied for `kccsd_t_rhf.py`''' a0, a1, b0, b1, c0, c1 = orb_indices out = get_w(ki, kj, kk, ka, kb, kc, a0, a1, b0, b1, c0, c1) out = out + get_w(kj, kk, ki, kb, kc, ka, b0, b1, c0, c1, a0, a1).transpose(2, 0, 1, 5, 3, 4) out = out + get_w(kk, ki, kj, kc, ka, kb, c0, c1, a0, a1, b0, b1).transpose(1, 2, 0, 4, 5, 3) out = out + get_w(ki, kk, kj, ka, kc, kb, a0, a1, c0, c1, b0, b1).transpose(0, 2, 1, 3, 5, 4) out = out + get_w(kk, kj, ki, kc, kb, ka, c0, c1, b0, b1, a0, a1).transpose(2, 1, 0, 5, 4, 3) out = out + get_w(kj, ki, kk, kb, ka, kc, b0, b1, a0, a1, c0, c1).transpose(1, 0, 2, 4, 3, 5) return out def get_data(kpt_indices): idx_args = get_data_slices(kpt_indices, task, kconserv) vvop_indices, vooo_indices, t2T_vvop_indices, t2T_vooo_indices = idx_args vvop_data = [eris_vvop[tuple(x)] for x in vvop_indices] vooo_data = [eris_vooo_C[tuple(x)] for x in vooo_indices] t2T_vvop_data = [t2T[tuple(x)] for x in t2T_vvop_indices] t2T_vooo_data = [t2T[tuple(x)] for x in t2T_vooo_indices] data = [vvop_data, vooo_data, t2T_vvop_data, t2T_vooo_data] return data def add_and_permute(kpt_indices, orb_indices, data): '''Performs permutation and addition of t3 temporary arrays.''' ki, kj, kk, ka, kb, kc = kpt_indices a0, a1, b0, b1, c0, c1 = orb_indices tmp_t3Tv_ijk = np.asarray(data[0], dtype=dtype, order='C') tmp_t3Tv_jik = np.asarray(data[1], dtype=dtype, order='C') tmp_t3Tv_kji = np.asarray(data[2], dtype=dtype, order='C') #out_ijk = np.empty(data[0].shape, dtype=dtype, order='C') #drv = _ccsd.libcc.MPICCadd_and_permute_t3T #drv(ctypes.c_int(nocc), ctypes.c_int(nvir), # ctypes.c_int(0), # out_ijk.ctypes.data_as(ctypes.c_void_p), # tmp_t3Tv_ijk.ctypes.data_as(ctypes.c_void_p), # tmp_t3Tv_jik.ctypes.data_as(ctypes.c_void_p), # tmp_t3Tv_kji.ctypes.data_as(ctypes.c_void_p), # mo_offset.ctypes.data_as(ctypes.c_void_p), # slices.ctypes.data_as(ctypes.c_void_p)) return (2. * tmp_t3Tv_ijk - tmp_t3Tv_jik.transpose(0, 1, 2, 4, 3, 5) - tmp_t3Tv_kji.transpose(0, 1, 2, 5, 4, 3)) #return out_ijk # Get location of padded elements in occupied and virtual space nonzero_opadding, nonzero_vpadding = padding_k_idx(mycc, kind="split") mem_now = lib.current_memory()[0] max_memory = max(0, mycc.max_memory - mem_now) blkmin = 4 # temporary t3 array is size: nkpts**3 * blksize**3 * nocc**3 * 16 vir_blksize = min( nvir, max(blkmin, int( (max_memory * .9e6 / 16 / nocc**3 / nkpts**3)**(1. / 3)))) tasks = [] logger.debug(mycc, 'max_memory %d MB (%d MB in use)', max_memory, mem_now) logger.debug(mycc, 'virtual blksize = %d (nvir = %d)', vir_blksize, nvir) for a0, a1 in lib.prange(0, nvir, vir_blksize): for b0, b1 in lib.prange(0, nvir, vir_blksize): for c0, c1 in lib.prange(0, nvir, vir_blksize): tasks.append((a0, a1, b0, b1, c0, c1)) eaa = [] for ka in range(nkpts): eaa.append(mo_e_o[ka][:, None] - mo_e_v[ka][None, :]) pt1 = np.zeros((nkpts, nocc, nvir), dtype=dtype) pt2 = np.zeros((nkpts, nkpts, nkpts, nocc, nocc, nvir, nvir), dtype=dtype) for ka, kb in product(range(nkpts), repeat=2): for task_id, task in enumerate(tasks): cput2 = (logger.process_clock(), logger.perf_counter()) a0, a1, b0, b1, c0, c1 = task my_permuted_w = np.zeros( (nkpts, ) * 3 + (a1 - a0, b1 - b0, c1 - c0) + (nocc, ) * 3, dtype=dtype) for ki, kj, kk in product(range(nkpts), repeat=3): # Find momentum conservation condition for triples # amplitude t3ijkabc kc = kpts_helper.get_kconserv3(cell, kpts, [ki, kj, kk, ka, kb]) kpt_indices = [ki, kj, kk, ka, kb, kc] #data = get_data(kpt_indices) my_permuted_w[ki, kj, kk] = get_permuted_w(ki, kj, kk, ka, kb, kc, task) for ki, kj, kk in product(range(nkpts), repeat=3): # eigenvalue denominator: e(i) + e(j) + e(k) eijk = _get_epqr([0, nocc, ki, mo_e_o, nonzero_opadding], [0, nocc, kj, mo_e_o, nonzero_opadding], [0, nocc, kk, mo_e_o, nonzero_opadding]) # Find momentum conservation condition for triples # amplitude t3ijkabc kc = kpts_helper.get_kconserv3(cell, kpts, [ki, kj, kk, ka, kb]) eabc = _get_epqr([a0, a1, ka, mo_e_v, nonzero_vpadding], [b0, b1, kb, mo_e_v, nonzero_vpadding], [c0, c1, kc, mo_e_v, nonzero_vpadding], fac=[-1., -1., -1.]) kpt_indices = [ki, kj, kk, ka, kb, kc] eabcijk = (eijk[None, None, None, :, :, :] + eabc[:, :, :, None, None, None]) tmp_t3Tv_ijk = my_permuted_w[ki, kj, kk] tmp_t3Tv_jik = my_permuted_w[kj, ki, kk] tmp_t3Tv_kji = my_permuted_w[kk, kj, ki] Ptmp_t3Tv = add_and_permute( kpt_indices, task, (tmp_t3Tv_ijk, tmp_t3Tv_jik, tmp_t3Tv_kji)) Ptmp_t3Tv /= eabcijk # Contribution to T1 amplitudes if ki == ka and kc == kconserv[kj, kb, kk]: eris_Soovv = ( 2. * eris.oovv[kj, kk, kb, :, :, b0:b1, c0:c1] - eris.oovv[kj, kk, kc, :, :, c0:c1, b0:b1].transpose( 0, 1, 3, 2)) pt1[ka, :, a0:a1] += 0.5 * einsum('abcijk,jkbc->ia', Ptmp_t3Tv, eris_Soovv) # Contribution to T2 amplitudes if ki == ka and kc == kconserv[kj, kb, kk]: tmp = einsum('abcijk,ia->jkbc', Ptmp_t3Tv, 0.5 * fov[ki, :, a0:a1]) _add_pt2(pt2, nkpts, kconserv, [kj, kk, kb], [None, None, (b0, b1), (c0, c1)], tmp) kd = kconserv[ka, ki, kb] eris_vovv = eris.vovv[kd, ki, kb, :, :, b0:b1, a0:a1] tmp = einsum('abcijk,diba->jkdc', Ptmp_t3Tv, eris_vovv) _add_pt2(pt2, nkpts, kconserv, [kj, kk, kd], [None, None, None, (c0, c1)], tmp) km = kconserv[kc, kk, kb] eris_ooov = eris.ooov[kj, ki, km, :, :, :, a0:a1] tmp = einsum('abcijk,jima->mkbc', Ptmp_t3Tv, eris_ooov) _add_pt2(pt2, nkpts, kconserv, [km, kk, kb], [None, None, (b0, b1), (c0, c1)], -1. * tmp) # Contribution to Wovoo array km = kconserv[ka, ki, kc] eris_oovv = eris.oovv[km, ki, kc, :, :, c0:c1, a0:a1] tmp = einsum('abcijk,mica->mbkj', Ptmp_t3Tv, eris_oovv) Wmcik[km, kb, kk, :, b0:b1, :, :] += tmp # Contribution to Wvvoo array ke = kconserv[ki, ka, kk] eris_oovv = eris.oovv[ki, kk, ka, :, :, a0:a1, :] tmp = einsum('abcijk,ikae->cbej', Ptmp_t3Tv, eris_oovv) Wacek[kc, kb, ke, c0:c1, b0:b1, :, :] -= tmp logger.timer_debug1( mycc, 'EOM-CCSD T3[2] ka,kb,vir=(%d,%d,%d/%d) [total=%d]' % (ka, kb, task_id, len(tasks), nkpts**5), *cput2) for ki in range(nkpts): ka = ki eia = LARGE_DENOM * np.ones( (nocc, nvir), dtype=eris.mo_energy[0].dtype) n0_ovp_ia = np.ix_(nonzero_opadding[ki], nonzero_vpadding[ka]) eia[n0_ovp_ia] = (mo_e_o[ki][:, None] - mo_e_v[ka])[n0_ovp_ia] pt1[ki] /= eia for ki, ka in product(range(nkpts), repeat=2): eia = LARGE_DENOM * np.ones( (nocc, nvir), dtype=eris.mo_energy[0].dtype) n0_ovp_ia = np.ix_(nonzero_opadding[ki], nonzero_vpadding[ka]) eia[n0_ovp_ia] = (mo_e_o[ki][:, None] - mo_e_v[ka])[n0_ovp_ia] for kj in range(nkpts): kb = kconserv[ki, ka, kj] ejb = LARGE_DENOM * np.ones( (nocc, nvir), dtype=eris.mo_energy[0].dtype) n0_ovp_jb = np.ix_(nonzero_opadding[kj], nonzero_vpadding[kb]) ejb[n0_ovp_jb] = (mo_e_o[kj][:, None] - mo_e_v[kb])[n0_ovp_jb] eijab = eia[:, None, :, None] + ejb[:, None, :] pt2[ki, kj, ka] /= eijab pt1 += t1 pt2 += t2 logger.timer(mycc, 'EOM-CCSD(T) imds', *cpu0) delta_ccsd_energy = mycc.energy(pt1, pt2, eris) - ccsd_energy logger.info(mycc, 'CCSD energy T3[2] correction : %16.12e', delta_ccsd_energy) return delta_ccsd_energy, pt1, pt2, Wmcik, Wacek
def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=np.zeros((1, 3)), kpts_band=None, exxdiv=None): '''Get the Coulomb (J) and exchange (K) AO matrices at sampled k-points. Args: dm_kpts : (nkpts, nao, nao) ndarray Density matrix at each k-point kpts : (nkpts, 3) ndarray Kwargs: hermi : int Whether K matrix is hermitian | 0 : not hermitian and not symmetric | 1 : hermitian kpts_band : (3,) ndarray or (*,3) ndarray A list of arbitrary "band" k-points at which to evalute the matrix. Returns: vj : (nkpts, nao, nao) ndarray vk : (nkpts, nao, nao) ndarray or list of vj and vk if the input dm_kpts is a list of DMs ''' cell = mydf.cell mesh = mydf.mesh coords = cell.gen_uniform_grids(mesh) ngrids = coords.shape[0] if getattr(dm_kpts, 'mo_coeff', None) is not None: mo_coeff = dm_kpts.mo_coeff mo_occ = dm_kpts.mo_occ else: mo_coeff = None kpts = np.asarray(kpts) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] weight = 1. / nkpts * (cell.vol / ngrids) kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) if gamma_point(kpts_band) and gamma_point(kpts): vk_kpts = np.zeros((nset, nband, nao, nao), dtype=dms.dtype) else: vk_kpts = np.zeros((nset, nband, nao, nao), dtype=np.complex128) coords = mydf.grids.coords ao2_kpts = [ np.asarray(ao.T, order='C') for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts) ] if input_band is None: ao1_kpts = ao2_kpts else: ao1_kpts = [ np.asarray(ao.T, order='C') for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts_band) ] if mo_coeff is not None and nset == 1: mo_coeff = [ mo_coeff[k][:, occ > 0] * np.sqrt(occ[occ > 0]) for k, occ in enumerate(mo_occ) ] ao2_kpts = [np.dot(mo_coeff[k].T, ao) for k, ao in enumerate(ao2_kpts)] mem_now = lib.current_memory()[0] max_memory = mydf.max_memory - mem_now blksize = int( min(nao, max(1, (max_memory - mem_now) * 1e6 / 16 / 4 / ngrids / nao))) logger.debug1(mydf, 'fft_jk: get_k_kpts max_memory %s blksize %d', max_memory, blksize) #ao1_dtype = np.result_type(*ao1_kpts) #ao2_dtype = np.result_type(*ao2_kpts) vR_dm = np.empty((nset, nao, ngrids), dtype=vk_kpts.dtype) t1 = (logger.process_clock(), logger.perf_counter()) for k2, ao2T in enumerate(ao2_kpts): if ao2T.size == 0: continue kpt2 = kpts[k2] naoj = ao2T.shape[0] if mo_coeff is None or nset > 1: ao_dms = [lib.dot(dms[i, k2], ao2T.conj()) for i in range(nset)] else: ao_dms = [ao2T.conj()] for k1, ao1T in enumerate(ao1_kpts): kpt1 = kpts_band[k1] # If we have an ewald exxdiv, we add the G=0 correction near the # end of the function to bypass any discretization errors # that arise from the FFT. if exxdiv == 'ewald' or exxdiv is None: coulG = tools.get_coulG(cell, kpt2 - kpt1, False, mydf, mesh) else: coulG = tools.get_coulG(cell, kpt2 - kpt1, exxdiv, mydf, mesh) if is_zero(kpt1 - kpt2): expmikr = np.array(1.) else: expmikr = np.exp(-1j * np.dot(coords, kpt2 - kpt1)) for p0, p1 in lib.prange(0, nao, blksize): rho1 = np.einsum('ig,jg->ijg', ao1T[p0:p1].conj() * expmikr, ao2T) vG = tools.fft(rho1.reshape(-1, ngrids), mesh) rho1 = None vG *= coulG vR = tools.ifft(vG, mesh).reshape(p1 - p0, naoj, ngrids) vG = None if vR_dm.dtype == np.double: vR = vR.real for i in range(nset): np.einsum('ijg,jg->ig', vR, ao_dms[i], out=vR_dm[i, p0:p1]) vR = None vR_dm *= expmikr.conj() for i in range(nset): vk_kpts[i, k1] += weight * lib.dot(vR_dm[i], ao1T.T) t1 = logger.timer_debug1(mydf, 'get_k_kpts: make_kpt (%d,*)' % k2, *t1) # Function _ewald_exxdiv_for_G0 to add back in the G=0 component to vk_kpts # Note in the _ewald_exxdiv_for_G0 implementation, the G=0 treatments are # different for 1D/2D and 3D systems. The special treatments for 1D and 2D # can only be used with AFTDF/GDF/MDF method. In the FFTDF method, 1D, 2D # and 3D should use the ewald probe charge correction. if exxdiv == 'ewald': _ewald_exxdiv_for_G0(cell, kpts, dms, vk_kpts, kpts_band=kpts_band) return _format_jks(vk_kpts, dm_kpts, input_band, kpts)
def add_wvvVV_(self, t1, t2, eris, t2new_tril, with_ovvv=False): #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: t2new += numpy.einsum('ijcd,acdb->ijab', tau, vvvv) assert (not self.direct) time0 = time.clock(), time.time() def contract_rec_(t2new_tril, tau, eri, i0, i1, j0, j1): nao = tau.shape[-1] ic = i1 - i0 jc = j1 - j0 #: t2tril[:,j0:j1] += numpy.einsum('xcd,cdab->xab', tau[:,i0:i1], eri) _dgemm('N', 'N', nocc * (nocc + 1) // 2, jc * nao, ic * nao, tau.reshape(-1, nao * nao), eri.reshape(-1, jc * nao), t2new_tril.reshape(-1, nao * nao), 1, 1, i0 * nao, 0, j0 * nao) #: t2tril[:,i0:i1] += numpy.einsum('xcd,abcd->xab', tau[:,j0:j1], eri) _dgemm('N', 'T', nocc * (nocc + 1) // 2, ic * nao, jc * nao, tau.reshape(-1, nao * nao), eri.reshape(-1, jc * nao), t2new_tril.reshape(-1, nao * nao), 1, 1, j0 * nao, 0, i0 * nao) def contract_tril_(t2new_tril, tau, eri, a0, a): nvir = tau.shape[-1] #: t2new[i,:i+1, a] += numpy.einsum('xcd,cdb->xb', tau[:,a0:a+1], eri) _dgemm('N', 'N', nocc * (nocc + 1) // 2, nvir, (a + 1 - a0) * nvir, tau.reshape(-1, nvir * nvir), eri.reshape(-1, nvir), t2new_tril.reshape(-1, nvir * nvir), 1, 1, a0 * nvir, 0, a * nvir) #: t2new[i,:i+1,a0:a] += numpy.einsum('xd,abd->xab', tau[:,a], eri[:a]) if a > a0: _dgemm('N', 'T', nocc * (nocc + 1) // 2, (a - a0) * nvir, nvir, tau.reshape(-1, nvir * nvir), eri.reshape(-1, nvir), t2new_tril.reshape(-1, nvir * nvir), 1, 1, a * nvir, 0, a0 * nvir) nocc, nvir = t1.shape nvir_pair = nvir * (nvir + 1) // 2 #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: t2new += numpy.einsum('ijcd,acdb->ijab', tau, vvvv) naux = eris.naux tau = numpy.empty((nocc * (nocc + 1) // 2, nvir, nvir)) p0 = 0 for i in range(nocc): tau[p0:p0 + i + 1] = numpy.einsum('a,jb->jab', t1[i], t1[:i + 1]) tau[p0:p0 + i + 1] += t2[i, :i + 1] p0 += i + 1 time0 = logger.timer_debug1(self, 'vvvv-tau', *time0) #TODO: check if vvL can be entirely load into memory max_memory = max(2000, self.max_memory - lib.current_memory()[0]) dmax = max(4, numpy.sqrt(max_memory * .9e6 / 8 / nvir**2 / 2)) vvblk = max(4, (max_memory * 1e6 / 8 - dmax**2 * (nvir**2 * 1.5 + naux)) / naux) dmax = int(dmax) vvblk = int(vvblk) eribuf = numpy.empty((dmax, dmax, nvir_pair)) loadbuf = numpy.empty((dmax, dmax, nvir, nvir)) for i0, i1 in lib.prange(0, nvir, dmax): di = i1 - i0 for j0, j1 in lib.prange(0, i0, dmax): dj = j1 - j0 ijL = numpy.empty((di, dj, naux)) for i in range(i0, i1): ioff = i * (i + 1) // 2 ijL[i - i0] = eris.vvL[ioff + j0:ioff + j1] ijL = ijL.reshape(-1, naux) eri = numpy.ndarray(((i1 - i0) * (j1 - j0), nvir_pair), buffer=eribuf) for p0, p1 in lib.prange(0, nvir_pair, vvblk): vvL = numpy.asarray(eris.vvL[p0:p1]) eri[:, p0:p1] = lib.ddot(ijL, vvL.T) vvL = None tmp = numpy.ndarray((i1 - i0, nvir, j1 - j0, nvir), buffer=loadbuf) _ccsd.libcc.CCload_eri(tmp.ctypes.data_as(ctypes.c_void_p), eri.ctypes.data_as(ctypes.c_void_p), (ctypes.c_int * 4)(i0, i1, j0, j1), ctypes.c_int(nvir)) contract_rec_(t2new_tril, tau, tmp, i0, i1, j0, j1) time0 = logger.timer_debug1( self, 'vvvv [%d:%d,%d:%d]' % (i0, i1, j0, j1), *time0) ijL = [] for i in range(i0, i1): ioff = i * (i + 1) // 2 ijL.append(eris.vvL[ioff + i0:ioff + i + 1]) ijL = numpy.vstack(ijL).reshape(-1, naux) eri = numpy.ndarray((di * (di + 1) // 2, nvir_pair), buffer=eribuf) for p0, p1 in lib.prange(0, nvir_pair, vvblk): vvL = numpy.asarray(eris.vvL[p0:p1]) eri[:, p0:p1] = lib.ddot(ijL, vvL.T) vvL = None for i in range(di): p0, p1 = i * (i + 1) // 2, (i + 1) * (i + 2) // 2 tmp = lib.unpack_tril(eri[p0:p1], out=loadbuf) contract_tril_(t2new_tril, tau, tmp, i0, i0 + i) time0 = logger.timer_debug1( self, 'vvvv [%d:%d,%d:%d]' % (i0, i1, i0, i1), *time0) eribuf = loadbuf = eri = tmp = None return t2new_tril
def get_jk_favork(sgx, dm, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e-13): t0 = time.clock(), time.time() mol = sgx.mol grids = sgx.grids gthrd = sgx.grids_thrd dms = numpy.asarray(dm) dm_shape = dms.shape nao = dm_shape[-1] dms = dms.reshape(-1,nao,nao) nset = dms.shape[0] if sgx.debug: batch_nuc = _gen_batch_nuc(mol) else: batch_jk = _gen_jk_direct(mol, 's2', with_j, with_k, direct_scf_tol, sgx._opt) t1 = logger.timer_debug1(mol, "sgX initialziation", *t0) sn = numpy.zeros((nao,nao)) vj = numpy.zeros_like(dms) vk = numpy.zeros_like(dms) ngrids = grids.coords.shape[0] max_memory = sgx.max_memory - lib.current_memory()[0] sblk = sgx.blockdim blksize = min(ngrids, max(4, int(min(sblk, max_memory*1e6/8/nao**2)))) tnuc = 0, 0 for i0, i1 in lib.prange(0, ngrids, blksize): coords = grids.coords[i0:i1] ao = mol.eval_gto('GTOval', coords) wao = ao * grids.weights[i0:i1,None] sn += lib.dot(ao.T, wao) fg = lib.einsum('gi,xij->xgj', wao, dms) mask = numpy.zeros(i1-i0, dtype=bool) for i in range(nset): mask |= numpy.any(fg[i]>gthrd, axis=1) mask |= numpy.any(fg[i]<-gthrd, axis=1) if not numpy.all(mask): ao = ao[mask] wao = wao[mask] fg = fg[:,mask] coords = coords[mask] if sgx.debug: tnuc = tnuc[0] - time.clock(), tnuc[1] - time.time() gbn = batch_nuc(mol, coords) tnuc = tnuc[0] + time.clock(), tnuc[1] + time.time() if with_j: jg = numpy.einsum('gij,xij->xg', gbn, dms) if with_k: gv = lib.einsum('gvt,xgt->xgv', gbn, fg) gbn = None else: tnuc = tnuc[0] - time.clock(), tnuc[1] - time.time() jg, gv = batch_jk(mol, coords, dms, fg.copy()) tnuc = tnuc[0] + time.clock(), tnuc[1] + time.time() if with_j: xj = lib.einsum('gv,xg->xgv', ao, jg) for i in range(nset): vj[i] += lib.einsum('gu,gv->uv', wao, xj[i]) if with_k: for i in range(nset): vk[i] += lib.einsum('gu,gv->uv', ao, gv[i]) jg = gv = None t2 = logger.timer_debug1(mol, "sgX J/K builder", *t1) tdot = t2[0] - t1[0] - tnuc[0] , t2[1] - t1[1] - tnuc[1] logger.debug1(sgx, '(CPU, wall) time for integrals (%.2f, %.2f); ' 'for tensor contraction (%.2f, %.2f)', tnuc[0], tnuc[1], tdot[0], tdot[1]) ovlp = mol.intor_symmetric('int1e_ovlp') proj = scipy.linalg.solve(sn, ovlp) if with_j: vj = lib.einsum('pi,xpj->xij', proj, vj) vj = (vj + vj.transpose(0,2,1))*.5 if with_k: vk = lib.einsum('pi,xpj->xij', proj, vk) if hermi == 1: vk = (vk + vk.transpose(0,2,1))*.5 logger.timer(mol, "vj and vk", *t0) return vj.reshape(dm_shape), vk.reshape(dm_shape)
def _make_eris_outcore(mycc, mo_coeff=None): cput0 = (time.clock(), time.time()) eris = _ChemistsERIs() eris._common_init_(mycc, mo_coeff) nocca, noccb = mycc.nocc nmoa, nmob = mycc.nmo nvira, nvirb = nmoa - nocca, nmob - noccb moa = eris.mo_coeff[0] mob = eris.mo_coeff[1] nmoa = moa.shape[1] nmob = mob.shape[1] orboa = moa[:, :nocca] orbob = mob[:, :noccb] orbva = moa[:, nocca:] orbvb = mob[:, noccb:] eris.feri = lib.H5TmpFile() eris.oooo = eris.feri.create_dataset('oooo', (nocca, nocca, nocca, nocca), 'f8') eris.ovoo = eris.feri.create_dataset('ovoo', (nocca, nvira, nocca, nocca), 'f8') eris.ovov = eris.feri.create_dataset('ovov', (nocca, nvira, nocca, nvira), 'f8') eris.oovv = eris.feri.create_dataset('oovv', (nocca, nocca, nvira, nvira), 'f8') eris.ovvo = eris.feri.create_dataset('ovvo', (nocca, nvira, nvira, nocca), 'f8') eris.ovvv = eris.feri.create_dataset('ovvv', (nocca, nvira, nvira * (nvira + 1) // 2), 'f8') #eris.vvvv = eris.feri.create_dataset('vvvv', (nvira,nvira,nvira,nvira), 'f8') eris.OOOO = eris.feri.create_dataset('OOOO', (noccb, noccb, noccb, noccb), 'f8') eris.OVOO = eris.feri.create_dataset('OVOO', (noccb, nvirb, noccb, noccb), 'f8') eris.OVOV = eris.feri.create_dataset('OVOV', (noccb, nvirb, noccb, nvirb), 'f8') eris.OOVV = eris.feri.create_dataset('OOVV', (noccb, noccb, nvirb, nvirb), 'f8') eris.OVVO = eris.feri.create_dataset('OVVO', (noccb, nvirb, nvirb, noccb), 'f8') eris.OVVV = eris.feri.create_dataset('OVVV', (noccb, nvirb, nvirb * (nvirb + 1) // 2), 'f8') #eris.VVVV = eris.feri.create_dataset('VVVV', (nvirb,nvirb,nvirb,nvirb), 'f8') eris.ooOO = eris.feri.create_dataset('ooOO', (nocca, nocca, noccb, noccb), 'f8') eris.ovOO = eris.feri.create_dataset('ovOO', (nocca, nvira, noccb, noccb), 'f8') eris.ovOV = eris.feri.create_dataset('ovOV', (nocca, nvira, noccb, nvirb), 'f8') eris.ooVV = eris.feri.create_dataset('ooVV', (nocca, nocca, nvirb, nvirb), 'f8') eris.ovVO = eris.feri.create_dataset('ovVO', (nocca, nvira, nvirb, noccb), 'f8') eris.ovVV = eris.feri.create_dataset('ovVV', (nocca, nvira, nvirb * (nvirb + 1) // 2), 'f8') #eris.vvVV = eris.feri.create_dataset('vvVV', (nvira,nvira,nvirb,nvirb), 'f8') eris.OVoo = eris.feri.create_dataset('OVoo', (noccb, nvirb, nocca, nocca), 'f8') eris.OOvv = eris.feri.create_dataset('OOvv', (noccb, noccb, nvira, nvira), 'f8') eris.OVvo = eris.feri.create_dataset('OVvo', (noccb, nvirb, nvira, nocca), 'f8') eris.OVvv = eris.feri.create_dataset('OVvv', (noccb, nvirb, nvira * (nvira + 1) // 2), 'f8') cput1 = time.clock(), time.time() mol = mycc.mol # <ij||pq> = <ij|pq> - <ij|qp> = (ip|jq) - (iq|jp) tmpf = lib.H5TmpFile() if nocca > 0: ao2mo.general(mol, (orboa, moa, moa, moa), tmpf, 'aa') buf = np.empty((nmoa, nmoa, nmoa)) for i in range(nocca): lib.unpack_tril(tmpf['aa'][i * nmoa:(i + 1) * nmoa], out=buf) eris.oooo[i] = buf[:nocca, :nocca, :nocca] eris.ovoo[i] = buf[nocca:, :nocca, :nocca] eris.ovov[i] = buf[nocca:, :nocca, nocca:] eris.oovv[i] = buf[:nocca, nocca:, nocca:] eris.ovvo[i] = buf[nocca:, nocca:, :nocca] eris.ovvv[i] = lib.pack_tril(buf[nocca:, nocca:, nocca:]) del (tmpf['aa']) if noccb > 0: buf = np.empty((nmob, nmob, nmob)) ao2mo.general(mol, (orbob, mob, mob, mob), tmpf, 'bb') for i in range(noccb): lib.unpack_tril(tmpf['bb'][i * nmob:(i + 1) * nmob], out=buf) eris.OOOO[i] = buf[:noccb, :noccb, :noccb] eris.OVOO[i] = buf[noccb:, :noccb, :noccb] eris.OVOV[i] = buf[noccb:, :noccb, noccb:] eris.OOVV[i] = buf[:noccb, noccb:, noccb:] eris.OVVO[i] = buf[noccb:, noccb:, :noccb] eris.OVVV[i] = lib.pack_tril(buf[noccb:, noccb:, noccb:]) del (tmpf['bb']) if nocca > 0: buf = np.empty((nmoa, nmob, nmob)) ao2mo.general(mol, (orboa, moa, mob, mob), tmpf, 'ab') for i in range(nocca): lib.unpack_tril(tmpf['ab'][i * nmoa:(i + 1) * nmoa], out=buf) eris.ooOO[i] = buf[:nocca, :noccb, :noccb] eris.ovOO[i] = buf[nocca:, :noccb, :noccb] eris.ovOV[i] = buf[nocca:, :noccb, noccb:] eris.ooVV[i] = buf[:nocca, noccb:, noccb:] eris.ovVO[i] = buf[nocca:, noccb:, :noccb] eris.ovVV[i] = lib.pack_tril(buf[nocca:, noccb:, noccb:]) del (tmpf['ab']) if noccb > 0: buf = np.empty((nmob, nmoa, nmoa)) ao2mo.general(mol, (orbob, mob, moa, moa), tmpf, 'ba') for i in range(noccb): lib.unpack_tril(tmpf['ba'][i * nmob:(i + 1) * nmob], out=buf) eris.OVoo[i] = buf[noccb:, :nocca, :nocca] eris.OOvv[i] = buf[:noccb, nocca:, nocca:] eris.OVvo[i] = buf[noccb:, nocca:, :nocca] eris.OVvv[i] = lib.pack_tril(buf[noccb:, nocca:, nocca:]) del (tmpf['ba']) buf = None cput1 = logger.timer_debug1(mycc, 'transforming oopq, ovpq', *cput1) if not mycc.direct: ao2mo.full(mol, orbva, eris.feri, dataname='vvvv') ao2mo.full(mol, orbvb, eris.feri, dataname='VVVV') ao2mo.general(mol, (orbva, orbva, orbvb, orbvb), eris.feri, dataname='vvVV') eris.vvvv = eris.feri['vvvv'] eris.VVVV = eris.feri['VVVV'] eris.vvVV = eris.feri['vvVV'] cput1 = logger.timer_debug1(mycc, 'transforming vvvv', *cput1) return eris
def get_jk(mf_grad, mol=None, dm=None, hermi=0, with_j=True, with_k=True, ishf=True): t0 = (time.clock(), time.time()) if mol is None: mol = mf_grad.mol if dm is None: dm = mf_grad.base.make_rdm1() with_df = mf_grad.base.with_df auxmol = with_df.auxmol if auxmol is None: auxmol = df.addons.make_auxmol(with_df.mol, with_df.auxbasis) pmol = mol + auxmol ao_loc = mol.ao_loc nbas = mol.nbas nauxbas = auxmol.nbas get_int3c_s1 = _int3c_wrapper(mol, auxmol, 'int3c2e', 's1') get_int3c_s2 = _int3c_wrapper(mol, auxmol, 'int3c2e', 's2ij') get_int3c_ip1 = _int3c_wrapper(mol, auxmol, 'int3c2e_ip1', 's1') get_int3c_ip2 = _int3c_wrapper(mol, auxmol, 'int3c2e_ip2', 's2ij') nao = mol.nao naux = auxmol.nao dms = numpy.asarray(dm) out_shape = dms.shape[:-2] + (3, ) + dms.shape[-2:] dms = dms.reshape(-1, nao, nao) nset = dms.shape[0] idx = numpy.arange(nao) idx = idx * (idx + 1) // 2 + idx dm_tril = dms + dms.transpose(0, 2, 1) dm_tril = lib.pack_tril(dm_tril) dm_tril[:, idx] *= .5 auxslices = auxmol.aoslice_by_atom() aux_loc = auxmol.ao_loc max_memory = mf_grad.max_memory - lib.current_memory()[0] blksize = int(min(max(max_memory * .5e6 / 8 / (nao**2 * 3), 20), naux, 240)) ao_ranges = balance_partition(aux_loc, blksize) if not with_k: # (i,j|P) rhoj = numpy.empty((nset, naux)) for shl0, shl1, nL in ao_ranges: int3c = get_int3c_s2((0, nbas, 0, nbas, shl0, shl1)) # (i,j|P) p0, p1 = aux_loc[shl0], aux_loc[shl1] rhoj[:, p0:p1] = lib.einsum('wp,nw->np', int3c, dm_tril) int3c = None # (P|Q) int2c = auxmol.intor('int2c2e', aosym='s1') rhoj = scipy.linalg.solve(int2c, rhoj.T, sym_pos=True).T int2c = None # (d/dX i,j|P) vj = numpy.zeros((nset, 3, nao, nao)) for shl0, shl1, nL in ao_ranges: int3c = get_int3c_ip1((0, nbas, 0, nbas, shl0, shl1)) # (i,j|P) p0, p1 = aux_loc[shl0], aux_loc[shl1] vj += lib.einsum('xijp,np->nxij', int3c, rhoj[:, p0:p1]) int3c = None if mf_grad.auxbasis_response: # (i,j|d/dX P) vjaux = numpy.empty((nset, nset, 3, naux)) for shl0, shl1, nL in ao_ranges: int3c = get_int3c_ip2( (0, nbas, 0, nbas, shl0, shl1)) # (i,j|P) p0, p1 = aux_loc[shl0], aux_loc[shl1] vjaux[:, :, :, p0:p1] = lib.einsum('xwp,mw,np->mnxp', int3c, dm_tril, rhoj[:, p0:p1]) int3c = None # (d/dX P|Q) int2c_e1 = auxmol.intor('int2c2e_ip1', aosym='s1') vjaux -= lib.einsum('xpq,mp,nq->mnxp', int2c_e1, rhoj, rhoj) vjaux = numpy.array([ -vjaux[:, :, :, p0:p1].sum(axis=3) for p0, p1 in auxslices[:, 2:] ]) if ishf: vjaux = vjaux.sum((1, 2)) else: vjaux = numpy.ascontiguousarray(vjaux.transpose(1, 2, 0, 3)) vj = lib.tag_array(-vj.reshape(out_shape), aux=numpy.array(vjaux)) else: vj = -vj.reshape(out_shape) logger.timer(mf_grad, 'df vj', *t0) return vj, None if hasattr(dm, 'mo_coeff') and hasattr(dm, 'mo_occ'): mo_coeff = dm.mo_coeff mo_occ = dm.mo_occ elif ishf: mo_coeff = mf_grad.base.mo_coeff mo_occ = mf_grad.base.mo_occ if isinstance(mf_grad.base, scf.rohf.ROHF): mo_coeff = numpy.vstack((mo_coeff, mo_coeff)) mo_occa = numpy.array(mo_occ > 0, dtype=numpy.double) mo_occb = numpy.array(mo_occ == 2, dtype=numpy.double) assert (mo_occa.sum() + mo_occb.sum() == mo_occ.sum()) mo_occ = numpy.vstack((mo_occa, mo_occb)) else: s0 = mol.intor('int1e_ovlp') mo_occ = [] mo_coeff = [] for dm in dms: sdms = reduce(lib.dot, (s0, dm, s0)) n, c = scipy.linalg.eigh(sdms, b=s0) mo_occ.append(n) mo_coeff.append(c) mo_occ = numpy.stack(mo_occ, axis=0) nmo = mo_occ.shape[-1] mo_coeff = numpy.asarray(mo_coeff).reshape(-1, nao, nmo) mo_occ = numpy.asarray(mo_occ).reshape(-1, nmo) rhoj = numpy.zeros((nset, naux)) f_rhok = lib.H5TmpFile() orbor = [] orbol = [] nocc = [] orbor_stack = numpy.zeros((nao, 0), dtype=mo_coeff.dtype, order='F') orbol_stack = numpy.zeros((nao, 0), dtype=mo_coeff.dtype, order='F') offs = 0 for i in range(nset): idx = numpy.abs(mo_occ[i]) > 1e-8 nocc.append(numpy.count_nonzero(idx)) c = mo_coeff[i][:, idx] orbol_stack = numpy.append(orbol_stack, c, axis=1) orbol.append(orbol_stack[:, offs:offs + nocc[-1]]) cn = lib.einsum('pi,i->pi', c, mo_occ[i][idx]) orbor_stack = numpy.append(orbor_stack, cn, axis=1) orbor.append(orbor_stack[:, offs:offs + nocc[-1]]) offs += nocc[-1] # (P|Q) int2c = scipy.linalg.cho_factor(auxmol.intor('int2c2e', aosym='s1')) t1 = (time.clock(), time.time()) max_memory = mf_grad.max_memory - lib.current_memory()[0] blksize = max_memory * .5e6 / 8 / (naux * nao) mol_ao_ranges = balance_partition(ao_loc, blksize) nsteps = len(mol_ao_ranges) t2 = t1 for istep, (shl0, shl1, nd) in enumerate(mol_ao_ranges): int3c = get_int3c_s1((0, nbas, shl0, shl1, 0, nauxbas)) t2 = logger.timer_debug1(mf_grad, 'df grad intor (P|mn)', *t2) p0, p1 = ao_loc[shl0], ao_loc[shl1] for i in range(nset): # MRH 05/21/2020: De-vectorize this because array contiguity -> parallel scaling v = lib.dot(int3c.reshape(nao, -1, order='F').T, orbor[i]).reshape(naux, (p1 - p0) * nocc[i]) t2 = logger.timer_debug1(mf_grad, 'df grad einsum (P|mn) u_ni N_i = v_Pmi', *t2) rhoj[i] += numpy.dot(v, orbol[i][p0:p1].ravel()) t2 = logger.timer_debug1(mf_grad, 'df grad einsum v_Pmi u_mi = rho_P', *t2) v = scipy.linalg.cho_solve(int2c, v) t2 = logger.timer_debug1(mf_grad, 'df grad cho_solve (P|Q) D_Qmi = v_Pmi', *t2) f_rhok['%s/%s' % (i, istep)] = v.reshape(naux, p1 - p0, -1) t2 = logger.timer_debug1( mf_grad, 'df grad cache D_Pmi (m <-> i transpose upon retrieval)', *t2) int3c = v = None rhoj = scipy.linalg.cho_solve(int2c, rhoj.T).T int2c = None t1 = logger.timer_debug1( mf_grad, 'df grad vj and vk AO (P|Q) D_Q = (P|mn) D_mn solve', *t1) def load(set_id, p0, p1): buf = numpy.empty((p1 - p0, nocc[set_id], nao)) col1 = 0 for istep in range(nsteps): dat = f_rhok['%s/%s' % (set_id, istep)][p0:p1] col0, col1 = col1, col1 + dat.shape[1] buf[:p1 - p0, :, col0:col1] = dat.transpose(0, 2, 1) return buf vj = numpy.zeros((nset, 3, nao, nao)) vk = numpy.zeros((nset, 3, nao, nao)) # (d/dX i,j|P) fmmm = _ao2mo.libao2mo.AO2MOmmm_bra_nr_s1 # MO output index slower than AO output index; input AOs are asymmetric fdrv = _ao2mo.libao2mo.AO2MOnr_e2_drv # comp and aux indices are slower ftrans = _ao2mo.libao2mo.AO2MOtranse2_nr_s1 # input is not tril_packed null = lib.c_null_ptr() t2 = t1 for shl0, shl1, nL in ao_ranges: int3c = get_int3c_ip1((0, nbas, 0, nbas, shl0, shl1)).transpose(0, 3, 2, 1) # (P|mn'), row-major order t2 = logger.timer_debug1(mf_grad, "df grad intor (P|mn')", *t2) p0, p1 = aux_loc[shl0], aux_loc[shl1] for i in range(nset): # MRH 05/21/2020: De-vectorize this because array contiguity -> parallel scaling vj[i, 0] += numpy.dot(rhoj[i, p0:p1], int3c[0].reshape(p1 - p0, -1)).reshape(nao, nao).T vj[i, 1] += numpy.dot(rhoj[i, p0:p1], int3c[1].reshape(p1 - p0, -1)).reshape(nao, nao).T vj[i, 2] += numpy.dot(rhoj[i, p0:p1], int3c[2].reshape(p1 - p0, -1)).reshape(nao, nao).T t2 = logger.timer_debug1(mf_grad, "df grad einsum rho_P (P|mn') rho_P", *t2) tmp = numpy.empty((3, p1 - p0, nocc[i], nao), dtype=orbol_stack.dtype) fdrv( ftrans, fmmm, # xPmn u_mi -> xPin tmp.ctypes.data_as(ctypes.c_void_p), int3c.ctypes.data_as(ctypes.c_void_p), orbol[i].ctypes.data_as(ctypes.c_void_p), ctypes.c_int(3 * (p1 - p0)), ctypes.c_int(nao), (ctypes.c_int * 4)(0, nocc[i], 0, nao), null, ctypes.c_int(0)) t2 = logger.timer_debug1(mf_grad, "df grad einsum (P|mn') u_mi = dg_Pin", *t2) rhok = load(i, p0, p1) vk[i] += lib.einsum('xpoi,pok->xik', tmp, rhok) t2 = logger.timer_debug1(mf_grad, "df grad einsum D_Pim dg_Pin = v_ij", *t2) rhok = tmp = None int3c = None t1 = logger.timer_debug1(mf_grad, 'df grad vj and vk AO (P|mn) D_P eval', *t1) if mf_grad.auxbasis_response: # Cache (P|uv) D_ui c_vj. Must be include both upper and lower triangles # over nset. max_memory = mf_grad.max_memory - lib.current_memory()[0] blksize = int( min(max(max_memory * .5e6 / 8 / (nao * max(nocc)), 20), naux)) rhok_oo = [] for i, j in product(range(nset), repeat=2): tmp = numpy.empty((naux, nocc[i], nocc[j])) for p0, p1 in lib.prange(0, naux, blksize): rhok = load(i, p0, p1).reshape((p1 - p0) * nocc[i], nao) tmp[p0:p1] = lib.dot(rhok, orbol[j]).reshape(p1 - p0, nocc[i], nocc[j]) rhok_oo.append(tmp) rhok = tmp = None t1 = logger.timer_debug1( mf_grad, 'df grad vj and vk aux d_Pim u_mj = d_Pij eval', *t1) vjaux = numpy.zeros((nset, nset, 3, naux)) vkaux = numpy.zeros((nset, nset, 3, naux)) # (i,j|d/dX P) t2 = t1 fmmm = _ao2mo.libao2mo.AO2MOmmm_bra_nr_s2 # MO output index slower than AO output index; input AOs are symmetric fdrv = _ao2mo.libao2mo.AO2MOnr_e2_drv # comp and aux indices are slower ftrans = _ao2mo.libao2mo.AO2MOtranse2_nr_s2 # input is tril_packed null = lib.c_null_ptr() for shl0, shl1, nL in ao_ranges: int3c = get_int3c_ip2((0, nbas, 0, nbas, shl0, shl1)) # (i,j|P) t2 = logger.timer_debug1(mf_grad, "df grad intor (P'|mn)", *t2) p0, p1 = aux_loc[shl0], aux_loc[shl1] drhoj = lib.dot( int3c.transpose(0, 2, 1).reshape(3 * (p1 - p0), -1), dm_tril.T).reshape(3, p1 - p0, -1) # xpij,mij->xpm vjaux[:, :, :, p0:p1] = lib.einsum('xpm,np->mnxp', drhoj, rhoj[:, p0:p1]) t2 = logger.timer_debug1( mf_grad, "df grad einsum rho_P (P'|mn) D_mn = v_P", *t2) tmp = [ numpy.empty((3, p1 - p0, nocc_i, nao), dtype=orbor_stack.dtype) for nocc_i in nocc ] assert (orbor_stack.flags.f_contiguous), '{} {}'.format( orbor_stack.shape, orbor_stack.strides) for orb, buf, nocc_i in zip(orbol, tmp, nocc): fdrv( ftrans, fmmm, # gPmn u_ni -> gPim buf.ctypes.data_as(ctypes.c_void_p), int3c.ctypes.data_as(ctypes.c_void_p), orb.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(3 * (p1 - p0)), ctypes.c_int(nao), (ctypes.c_int * 4)(0, nocc_i, 0, nao), null, ctypes.c_int(0)) int3c = [[ lib.dot(buf.reshape(-1, nao), orb).reshape(3, p1 - p0, -1, norb) for orb, norb in zip(orbor, nocc) ] for buf in tmp] # pim,mj,j -> pij t2 = logger.timer_debug1( mf_grad, "df grad einsum (P'|mn) u_mi u_nj N_j = v_Pmn", *t2) for i, j in product(range(nset), repeat=2): k = (i * nset) + j tmp = rhok_oo[k][p0:p1] vkaux[i, j, :, p0:p1] += lib.einsum('xpij,pij->xp', int3c[i][j], tmp) t2 = logger.timer_debug1(mf_grad, "df grad einsum d_Pij v_Pij = v_P", *t2) int3c = tmp = None t1 = logger.timer_debug1(mf_grad, "df grad vj and vk aux (P'|mn) eval", *t1) # (d/dX P|Q) int2c_e1 = auxmol.intor('int2c2e_ip1') vjaux -= lib.einsum('xpq,mp,nq->mnxp', int2c_e1, rhoj, rhoj) for i, j in product(range(nset), repeat=2): k = (i * nset) + j l = (j * nset) + i tmp = lib.einsum('pij,qji->pq', rhok_oo[k], rhok_oo[l]) vkaux[i, j] -= lib.einsum('xpq,pq->xp', int2c_e1, tmp) t1 = logger.timer_debug1(mf_grad, "df grad vj and vk aux (P'|Q) eval", *t1) vjaux = numpy.array([ -vjaux[:, :, :, p0:p1].sum(axis=3) for p0, p1 in auxslices[:, 2:] ]) vkaux = numpy.array([ -vkaux[:, :, :, p0:p1].sum(axis=3) for p0, p1 in auxslices[:, 2:] ]) if ishf: vjaux = vjaux.sum((1, 2)) idx = numpy.array(list(range(nset))) * (nset + 1) vkaux = vkaux.reshape((nset**2, 3, mol.natm))[idx, :, :].sum(0) else: vjaux = numpy.ascontiguousarray(vjaux.transpose(1, 2, 0, 3)) vkaux = numpy.ascontiguousarray(vkaux.transpose(1, 2, 0, 3)) vj = lib.tag_array(-vj.reshape(out_shape), aux=numpy.array(vjaux)) vk = lib.tag_array(-vk.reshape(out_shape), aux=numpy.array(vkaux)) else: vj = -vj.reshape(out_shape) vk = -vk.reshape(out_shape) logger.timer(mf_grad, 'df grad vj and vk', *t0) return vj, vk
def build(self, omega=None, direct_scf_tol=None): cpu0 = (time.clock(), time.time()) cell = self.cell kpts = self.kpts k_scaled = cell.get_scaled_kpts(kpts).sum(axis=0) k_mod_to_half = k_scaled * 2 - (k_scaled * 2).round(0) if abs(k_mod_to_half).sum() > 1e-5: raise NotImplementedError('k-points must be symmetryic') if omega is not None: self.omega = omega if self.omega is None: # Search a proper range-separation parameter omega that can balance the # computational cost between the real space integrals and moment space # integrals self.omega, self.mesh, self.ke_cutoff = _guess_omega( cell, kpts, self.mesh) else: self.ke_cutoff = aft.estimate_ke_cutoff_for_omega(cell, self.omega) self.mesh = pbctools.cutoff_to_mesh(cell.lattice_vectors(), self.ke_cutoff) logger.info(self, 'omega = %.15g ke_cutoff = %s mesh = %s', self.omega, self.ke_cutoff, self.mesh) if direct_scf_tol is None: direct_scf_tol = cell.precision**1.5 logger.debug(self, 'Set direct_scf_tol %g', direct_scf_tol) self.cell_rs = cell_rs = _re_contract_cell(cell, self.ke_cutoff) self.bvk_kmesh = kmesh = k2gamma.kpts_to_kmesh(cell_rs, kpts) bvkcell, phase = k2gamma.get_phase(cell_rs, kpts, kmesh) self.bvkmesh_Ls = Ks = k2gamma.translation_vectors_for_kmesh( cell_rs, kmesh) self.bvkcell = bvkcell self.phase = phase # Given ke_cutoff, eta corresponds to the most steep Gaussian basis # of which the Coulomb integrals can be accurately computed in moment # space. eta = aft.estimate_eta_for_ke_cutoff(cell, self.ke_cutoff, precision=cell.precision) # * Assuming the most steep function in smooth basis has exponent eta, # with attenuation parameter omega, rcut_sr is the distance of which # the value of attenuated Coulomb integrals of four shells |eta> is # smaller than the required precision. # * The attenuated coulomb integrals between four s-type Gaussians # (2*a/pi)^{3/4}exp(-a*r^2) is # (erfc(omega*a^0.5/(omega^2+a)^0.5*R) - erfc(a^0.5*R)) / R # if two Gaussians on one center and the other two on another center # and the distance between the two centers are R. # * The attenuated coulomb integrals between two spherical charge # distributions is # ~(pi/eta)^3/2 (erfc(tau*(eta/2)^0.5*R) - erfc((eta/2)^0.5*R)) / R # tau = omega/sqrt(omega^2 + eta/2) # if the spherical charge distribution is the product of above s-type # Gaussian with exponent eta and a very smooth function. # When R is large, the attenuated Coulomb integral is # ~= (pi/eta)^3/2 erfc(tau*(eta/2)^0.5*R) / R # ~= pi/(tau*eta^2*R^2) exp(-tau^2*eta*R^2/2) tau = self.omega / (self.omega**2 + eta / 2)**.5 rcut_sr = 10 # initial guess rcut_sr = (-np.log(direct_scf_tol * tau * (eta * rcut_sr)**2 / np.pi) / (tau**2 * eta / 2))**.5 logger.debug(self, 'eta = %g rcut_sr = %g', eta, rcut_sr) # Ls is the translation vectors to mimic periodicity of a cell Ls = bvkcell.get_lattice_Ls(rcut=cell.rcut + rcut_sr) self.supmol_Ls = Ls = Ls[np.linalg.norm(Ls, axis=1).argsort()] supmol = _make_extended_mole(cell_rs, Ls, Ks, self.omega, direct_scf_tol) self.supmol = supmol nkpts = len(self.bvkmesh_Ls) nbas = cell_rs.nbas n_steep, n_local, n_diffused = cell_rs._nbas_each_set n_compact = n_steep + n_local bas_mask = supmol._bas_mask self.bvk_bas_mask = bvk_bas_mask = bas_mask.any(axis=2) # Some basis in bvk-cell are not presented in the supmol. They can be # skipped when computing SR integrals self.bvkcell._bas = bvkcell._bas[bvk_bas_mask.ravel()] # Record the mapping between the dense bvkcell basis and the # original sparse bvkcell basis bvk_cell_idx = np.repeat(np.arange(nkpts)[:, None], nbas, axis=1) self.bvk_cell_id = bvk_cell_idx[bvk_bas_mask].astype(np.int32) cell0_shl_idx = np.repeat(np.arange(nbas)[None, :], nkpts, axis=0) self.cell0_shl_id = cell0_shl_idx[bvk_bas_mask].astype(np.int32) logger.timer_debug1(self, 'initializing supmol', *cpu0) logger.info(self, 'sup-mol nbas = %d cGTO = %d pGTO = %d', supmol.nbas, supmol.nao, supmol.npgto_nr()) supmol.omega = -self.omega # Set short range coulomb with supmol.with_integral_screen(direct_scf_tol**2): vhfopt = _vhf.VHFOpt(supmol, 'int2e_sph', qcondname=libpbc.PBCVHFsetnr_direct_scf) vhfopt.direct_scf_tol = direct_scf_tol self.vhfopt = vhfopt logger.timer(self, 'initializing vhfopt', *cpu0) q_cond = vhfopt.get_q_cond((supmol.nbas, supmol.nbas)) idx = supmol._images_loc bvk_q_cond = lib.condense('NP_absmax', q_cond, idx, idx) ovlp_mask = bvk_q_cond > direct_scf_tol # Remove diffused-diffused block if n_diffused > 0: diffused_mask = np.zeros_like(bvk_bas_mask) diffused_mask[:, n_compact:] = True diffused_mask = diffused_mask[bvk_bas_mask] ovlp_mask[diffused_mask[:, None] & diffused_mask] = False self.ovlp_mask = ovlp_mask.astype(np.int8) # mute rcut_threshold, divide basis into two sets only cell_lr_aft = _re_contract_cell(cell, self.ke_cutoff, -1, verbose=0) self.lr_aft = lr_aft = _LongRangeAFT(cell_lr_aft, kpts, self.omega, self.bvk_kmesh) lr_aft.ke_cutoff = self.ke_cutoff lr_aft.mesh = self.mesh lr_aft.eta = eta return self
def get_ontop_pair_density(ot, rho, ao, oneCDMs, twoCDM_amo, ao2amo, deriv=0, non0tab=None): r''' Pi(r) = i(r)*j(r)*k(r)*l(r)*d_ijkl / 2 = rho[0](r)*rho[1](r) + i(r)*j(r)*k(r)*l(r)*l_ijkl / 2 Args: ot : on-top pair density functional object rho : ndarray of shape (2,*,ngrids) contains spin density [and derivatives] ao : ndarray of shape (*, ngrids, nao) contains values of aos [and derivatives] oneCDMs : ndarray of shape (2, nao, nao) contains spin-separated 1-RDM twoCDM_amo : ndarray of shape (mc.ncas, mc.ncas, mc.ncas, mc.ncas) contains spin-summed two-body cumulant density matrix in active space ao2amo : ndarray of shape (nao, ncas) molecular-orbital coefficients for active-space orbitals Kwargs: deriv : derivative order through which to calculate. Default is 0. deriv > 1 not implemented non0tab : as in pyscf.dft.gen_grid and pyscf.dft.numint Returns : ndarray of shape (*,ngrids) The on-top pair density and its derivatives if requested deriv = 0 : value (1d array) deriv = 1 : value, d/dx, d/dy, d/dz deriv = 2 : value, d/dx, d/dy, d/dz, d^2/d|r1-r2|^2_(r1=r2) ''' # Fix dimensionality of rho and ao if rho.ndim == 2: rho = rho.reshape(rho.shape[0], 1, rho.shape[1]) if ao.ndim == 2: ao = ao.reshape(1, ao.shape[0], ao.shape[1]) # Debug code for ultra-slow, ultra-high-memory but very safe implementation if ot.verbose > logger.DEBUG: logger.debug( ot, 'Warning: memory-intensive cacheing of full 2RDM for testing ' 'purposes initiated; reduce verbosity to increase speed and memory efficiency' ) twoRDM = represent_operator_in_basis(twoCDM_amo, ao2amo.conjugate().T) twoRDM = get_2RDM_from_2CDM(twoRDM, oneCDMs) # First cumulant and derivatives (chain rule! product rule!) t0 = (time.process_time(), time.time()) Pi = np.zeros_like(rho[0]) Pi[0] = rho[0, 0] * rho[1, 0] if deriv > 0: assert (rho.shape[1] >= 4), rho.shape assert (ao.shape[0] >= 4), ao.shape for ideriv in range(1, 4): Pi[ideriv] = rho[0, ideriv] * rho[1, 0] + rho[0, 0] * rho[1, ideriv] if deriv > 1: assert (rho.shape[1] >= 6), rho.shape assert (ao.shape[0] >= 10), ao.shape Pi[4] = -(rho[:, 1:4].sum(0).conjugate() * rho[:, 1:4].sum(0)).sum(0) / 4 Pi[4] += rho[0, 0] * (rho[1, 4] / 4 + rho[0, 5] * 2) Pi[4] += rho[1, 0] * (rho[0, 4] / 4 + rho[1, 5] * 2) t0 = logger.timer_debug1(ot, 'otpd first cumulant', *t0) # Second cumulant and derivatives (chain rule! product rule!) # dot, tensordot, and sum are hugely faster than np.einsum # but whether or when they actually multithread is unclear # Update 05/11/2020: ao is actually stored in row-major order # = (deriv,AOs,grids). #grid2amo_ref = np.tensordot (ao, ao2amo, axes=1) #np.einsum ('ijk,kl->ijl', ao, ao2amo) grid2amo = _grid_ao2mo(ot.mol, ao, ao2amo, non0tab=non0tab) t0 = logger.timer(ot, 'otpd ao2mo', *t0) gridkern = np.zeros(grid2amo.shape + (grid2amo.shape[2], ), dtype=grid2amo.dtype) gridkern[0] = grid2amo[0, :, :, np.newaxis] * grid2amo[ 0, :, np.newaxis, :] # r_0ai, r_0aj -> r_0aij wrk0 = np.tensordot(gridkern[0], twoCDM_amo, axes=2) # r_0aij, P_ijkl -> P_0akl Pi[0] += (gridkern[0] * wrk0).sum((1, 2)) / 2 # r_0aij, P_0aij -> P_0a t0 = logger.timer_debug1(ot, 'otpd second cumulant 0th derivative', *t0) if ot.verbose > logger.DEBUG: logger.debug( ot, 'Warning: slow einsum-based testing calculation of Pi initiated; ' 'reduce verbosity to increase speed and memory efficiency') test_Pi = np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[0], ao[0], ao[0], ao[0]) / 2 logger.debug(ot, "Pi, |tensordot_formula - einsum_formula| = %s", linalg.norm(Pi[0] - test_Pi)) t0 = logger.timer(ot, 'otpd 0th derivative debug'.format(ideriv), *t0) if deriv > 0: for ideriv in range(1, 4): # Fourfold tensor symmetry ijkl = klij = jilk = lkji & product rule -> factor of 4 gridkern[ideriv] = grid2amo[ideriv, :, :, np.newaxis] * grid2amo[ 0, :, np.newaxis, :] # r_1ai, r_0aj -> r_1aij Pi[ideriv] += (gridkern[ideriv] * wrk0).sum( (1, 2)) * 2 # r_1aij, P_0aij -> P_1a t0 = logger.timer_debug1( ot, 'otpd second cumulant 1st derivative ({})'.format(ideriv), *t0) if ot.verbose > logger.DEBUG: logger.debug( ot, 'Warning: slow einsum-based testing calculation of Pi\'s first derivatives initiated; ' 'reduce verbosity to increase speed and memory efficiency') test_Pi = np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[ideriv], ao[0], ao[0], ao[0]) / 2 test_Pi += np.einsum('ijkl,aj,ai,ak,al->a', twoRDM, ao[ideriv], ao[0], ao[0], ao[0]) / 2 test_Pi += np.einsum('ijkl,ak,ai,aj,al->a', twoRDM, ao[ideriv], ao[0], ao[0], ao[0]) / 2 test_Pi += np.einsum('ijkl,al,ai,aj,ak->a', twoRDM, ao[ideriv], ao[0], ao[0], ao[0]) / 2 logger.debug( ot, "Pi derivative, |tensordot_formula - einsum_formula| = %s", linalg.norm(Pi[ideriv] - test_Pi)) t0 = logger.timer( ot, 'otpd 1st derivative ({}) debug'.format(ideriv), *t0) if deriv > 1: # The fifth slot is allocated to the "off-top Laplacian," i.e., nabla_(r1-r2)^2 Pi(r1,r2)|(r1=r2) # nabla_off^2 Pi = 1/2 d^ik_jl * ([nabla_r^2 phi_i] phi_j phi_k phi_l + {1 - p_jk - p_jl}[nabla_r phi_i . nabla_r phi_j] phi_k phi_l) # using four-fold symmetry a lot! be careful! if ot.verbose > logger.DEBUG: test2_Pi = Pi[4].copy() XX, YY, ZZ = 4, 7, 9 gridkern[4] = grid2amo[[XX, YY, ZZ], :, :, np.newaxis].sum( 0) * grid2amo[0, :, np.newaxis, :] # r_2ai, r_0aj -> r_2aij gridkern[4] += (grid2amo[1:4, :, :, np.newaxis] * grid2amo[1:4, :, np.newaxis, :]).sum( 0) # r_1ai, r_1aj -> r_2aij wrk1 = np.tensordot(gridkern[1:4], twoCDM_amo, axes=2) # r_1aij, P_ijkl -> P_1akl Pi[4] += (gridkern[4] * wrk0).sum((1, 2)) / 2 # r_2aij, P_0aij -> P_2a Pi[4] -= ( (gridkern[1:4] + gridkern[1:4].transpose(0, 1, 3, 2)) * wrk1).sum( (0, 2, 3)) / 2 # r_1aij, P_1aij -> P_2a t0 = logger.timer(ot, 'otpd second cumulant off-top Laplacian', *t0) if ot.verbose > logger.DEBUG: logger.debug( ot, 'Warning: slow einsum-based testing calculation of Pi\'s second derivatives initiated; ' 'reduce verbosity to increase speed and memory efficiency') X, Y, Z = 1, 2, 3 test_Pi = np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[XX], ao[0], ao[0], ao[0]) / 2 test_Pi += np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[YY], ao[0], ao[0], ao[0]) / 2 test_Pi += np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[ZZ], ao[0], ao[0], ao[0]) / 2 test_Pi += np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[X], ao[X], ao[0], ao[0]) / 2 test_Pi += np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[Y], ao[Y], ao[0], ao[0]) / 2 test_Pi += np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[Z], ao[Z], ao[0], ao[0]) / 2 test_Pi -= np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[X], ao[0], ao[X], ao[0]) / 2 test_Pi -= np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[Y], ao[0], ao[Y], ao[0]) / 2 test_Pi -= np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[Z], ao[0], ao[Z], ao[0]) / 2 test_Pi -= np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[X], ao[0], ao[0], ao[X]) / 2 test_Pi -= np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[Y], ao[0], ao[0], ao[Y]) / 2 test_Pi -= np.einsum('ijkl,ai,aj,ak,al->a', twoRDM, ao[Z], ao[0], ao[0], ao[Z]) / 2 logger.debug( ot, 'Pi off-top Laplacian, |tensordot formula - einsum_formula| = %s', linalg.norm(Pi[4] - test_Pi)) test2_Pi += np.einsum('ijkl,ai,aj,ak,al->a', twoCDM_amo, grid2amo[XX], grid2amo[0], grid2amo[0], grid2amo[0]) / 2 test2_Pi += np.einsum('ijkl,ai,aj,ak,al->a', twoCDM_amo, grid2amo[YY], grid2amo[0], grid2amo[0], grid2amo[0]) / 2 test2_Pi += np.einsum('ijkl,ai,aj,ak,al->a', twoCDM_amo, grid2amo[ZZ], grid2amo[0], grid2amo[0], grid2amo[0]) / 2 test2_Pi += np.einsum('ijkl,ai,aj,ak,al->a', twoCDM_amo, grid2amo[X], grid2amo[X], grid2amo[0], grid2amo[0]) / 2 test2_Pi += np.einsum('ijkl,ai,aj,ak,al->a', twoCDM_amo, grid2amo[Y], grid2amo[Y], grid2amo[0], grid2amo[0]) / 2 test2_Pi += np.einsum('ijkl,ai,aj,ak,al->a', twoCDM_amo, grid2amo[Z], grid2amo[Z], grid2amo[0], grid2amo[0]) / 2 test2_Pi -= np.einsum('ijkl,ai,aj,ak,al->a', twoCDM_amo, grid2amo[X], grid2amo[0], grid2amo[X], grid2amo[0]) / 2 test2_Pi -= np.einsum('ijkl,ai,aj,ak,al->a', twoCDM_amo, grid2amo[Y], grid2amo[0], grid2amo[Y], grid2amo[0]) / 2 test2_Pi -= np.einsum('ijkl,ai,aj,ak,al->a', twoCDM_amo, grid2amo[Z], grid2amo[0], grid2amo[Z], grid2amo[0]) / 2 test2_Pi -= np.einsum('ijkl,ai,aj,ak,al->a', twoCDM_amo, grid2amo[X], grid2amo[0], grid2amo[0], grid2amo[X]) / 2 test2_Pi -= np.einsum('ijkl,ai,aj,ak,al->a', twoCDM_amo, grid2amo[Y], grid2amo[0], grid2amo[0], grid2amo[Y]) / 2 test2_Pi -= np.einsum('ijkl,ai,aj,ak,al->a', twoCDM_amo, grid2amo[Z], grid2amo[0], grid2amo[0], grid2amo[Z]) / 2 logger.debug( ot, 'Pi off-top Laplacian, testing second cumulant only |tensordot formula - einsum_formula| = %s', linalg.norm(Pi[4] - test2_Pi)) t0 = logger.timer(ot, 'otpd off-top Laplacian debug', *t0) # Unfix dimensionality of rho, ao, and Pi if Pi.shape[0] == 1: Pi = Pi.reshape(Pi.shape[1]) rho = rho.reshape(rho.shape[0], rho.shape[2]) ao = ao.reshape(ao.shape[1], ao.shape[2]) return Pi
def get_jk_favorj(sgx, dm, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e-13): t0 = time.clock(), time.time() mol = sgx.mol grids = sgx.grids gthrd = sgx.grids_thrd dms = numpy.asarray(dm) dm_shape = dms.shape nao = dm_shape[-1] dms = dms.reshape(-1, nao, nao) nset = dms.shape[0] if sgx.debug: batch_nuc = _gen_batch_nuc(mol) else: batch_jk = _gen_jk_direct(mol, 's2', with_j, with_k, direct_scf_tol) # for basis set to shell intor = mol._add_suffix('int3c2e') fakemol = gto.fakemol_for_charges(grids.coords) atm, bas, env = gto.mole.conc_env(mol._atm, mol._bas, mol._env, fakemol._atm, fakemol._bas, fakemol._env) ao_loc = moleintor.make_loc(bas, intor) rao_loc = numpy.zeros((nao), dtype=int) for i in range(mol.nbas): for j in range(ao_loc[i], ao_loc[i + 1]): rao_loc[j] = i sn = numpy.zeros((nao, nao)) ngrids = grids.coords.shape[0] max_memory = sgx.max_memory - lib.current_memory()[0] sblk = sgx.blockdim blksize = min(ngrids, max(4, int(min(sblk, max_memory * 1e6 / 8 / nao**2)))) for i0, i1 in lib.prange(0, ngrids, blksize): coords = grids.coords[i0:i1] ao = mol.eval_gto('GTOval', coords) wao = ao * grids.weights[i0:i1, None] sn += lib.dot(ao.T, wao) ovlp = mol.intor_symmetric('int1e_ovlp') proj = scipy.linalg.solve(sn, ovlp) proj_dm = lib.einsum('ki,xij->xkj', proj, dms) t1 = logger.timer_debug1(mol, "sgX initialziation", *t0) vj = numpy.zeros_like(dms) vk = numpy.zeros_like(dms) tnuc = 0, 0 for i0, i1 in lib.prange(0, ngrids, blksize): coords = grids.coords[i0:i1] ao = mol.eval_gto('GTOval', coords) wao = ao * grids.weights[i0:i1, None] fg = lib.einsum('gi,xij->xgj', wao, proj_dm) mask = numpy.zeros(i1 - i0, dtype=bool) for i in range(nset): gmaxfg = numpy.amax(numpy.absolute(fg[i]), axis=1) gmaxwao_v = numpy.amax(numpy.absolute(ao), axis=1) gmaxtt = gmaxfg * gmaxwao_v mask |= numpy.any(gmaxtt > 1e-7) mask |= numpy.any(gmaxtt < -1e-7) if not numpy.all(mask): ao = ao[mask] wao = wao[mask] fg = fg[:, mask] coords = coords[mask] # screening u by value of grids umaxg = numpy.amax(numpy.absolute(wao), axis=0) usi = numpy.argwhere(umaxg > 1e-7).reshape(-1) if len(usi) != 0: # screening v by ovlp uovl = ovlp[usi, :] vmaxu = numpy.amax(numpy.absolute(uovl), axis=0) osi = numpy.argwhere(vmaxu > 1e-4).reshape(-1) udms = proj_dm[0][usi, :] # screening v by dm and ovlp then triangle matrix bn dmaxg = numpy.amax(numpy.absolute(udms), axis=0) dsi = numpy.argwhere(dmaxg > 1e-4).reshape(-1) vsi = numpy.intersect1d(dsi, osi) if len(vsi) != 0: vsh = numpy.unique(rao_loc[vsi]) mol._bvv = vsh # screening u by value of grids umaxg = numpy.amax(numpy.absolute(wao), axis=0) usi = numpy.argwhere(umaxg > 1e-7).reshape(-1) if len(usi) != 0: # screening v by ovlp uovl = ovlp[usi, :] vmaxu = numpy.amax(numpy.absolute(uovl), axis=0) osi = numpy.argwhere(vmaxu > 1e-4).reshape(-1) if len(osi) != 0: vsh = numpy.unique(rao_loc[osi]) #print(vsh.shape,'eew',vsh) mol._bvv = vsh fg = lib.einsum('gi,xij->xgj', wao, proj_dm) mask = numpy.zeros(i1 - i0, dtype=bool) for i in range(nset): mask |= numpy.any(fg[i] > gthrd, axis=1) mask |= numpy.any(fg[i] < -gthrd, axis=1) if not numpy.all(mask): ao = ao[mask] fg = fg[:, mask] coords = coords[mask] if with_j: rhog = numpy.einsum('xgu,gu->xg', fg, ao) else: rhog = None if sgx.debug: tnuc = tnuc[0] - time.clock(), tnuc[1] - time.time() gbn = batch_nuc(mol, coords) tnuc = tnuc[0] + time.clock(), tnuc[1] + time.time() if with_j: jpart = numpy.einsum('guv,xg->xuv', gbn, rhog) if with_k: gv = lib.einsum('gtv,xgt->xgv', gbn, fg) gbn = None else: tnuc = tnuc[0] - time.clock(), tnuc[1] - time.time() jpart, gv = batch_jk(mol, coords, rhog, fg) tnuc = tnuc[0] + time.clock(), tnuc[1] + time.time() if with_j: vj += jpart if with_k: for i in range(nset): vk[i] += lib.einsum('gu,gv->uv', ao, gv[i]) jpart = gv = None t2 = logger.timer_debug1(mol, "sgX J/K builder", *t1) tdot = t2[0] - t1[0] - tnuc[0], t2[1] - t1[1] - tnuc[1] logger.debug1( sgx, '(CPU, wall) time for integrals (%.2f, %.2f); ' 'for tensor contraction (%.2f, %.2f)', tnuc[0], tnuc[1], tdot[0], tdot[1]) for i in range(nset): lib.hermi_triu(vj[i], inplace=True) if with_k and hermi == 1: vk = (vk + vk.transpose(0, 2, 1)) * .5 logger.timer(mol, "vj and vk", *t0) return vj.reshape(dm_shape), vk.reshape(dm_shape)
def _make_eris_outcore(mycc, mo_coeff=None): cput0 = (time.clock(), time.time()) eris = _ChemistsERIs() eris._common_init_(mycc, mo_coeff) nocca, noccb = mycc.nocc nmoa, nmob = mycc.nmo nvira, nvirb = nmoa-nocca, nmob-noccb moa = eris.mo_coeff[0] mob = eris.mo_coeff[1] nmoa = moa.shape[1] nmob = mob.shape[1] orboa = moa[:,:nocca] orbob = mob[:,:noccb] orbva = moa[:,nocca:] orbvb = mob[:,noccb:] eris.feri = lib.H5TmpFile() eris.oooo = eris.feri.create_dataset('oooo', (nocca,nocca,nocca,nocca), 'f8') eris.ovoo = eris.feri.create_dataset('ovoo', (nocca,nvira,nocca,nocca), 'f8') eris.ovov = eris.feri.create_dataset('ovov', (nocca,nvira,nocca,nvira), 'f8') eris.oovv = eris.feri.create_dataset('oovv', (nocca,nocca,nvira,nvira), 'f8') eris.ovvo = eris.feri.create_dataset('ovvo', (nocca,nvira,nvira,nocca), 'f8') eris.ovvv = eris.feri.create_dataset('ovvv', (nocca,nvira,nvira*(nvira+1)//2), 'f8') #eris.vvvv = eris.feri.create_dataset('vvvv', (nvira,nvira,nvira,nvira), 'f8') eris.OOOO = eris.feri.create_dataset('OOOO', (noccb,noccb,noccb,noccb), 'f8') eris.OVOO = eris.feri.create_dataset('OVOO', (noccb,nvirb,noccb,noccb), 'f8') eris.OVOV = eris.feri.create_dataset('OVOV', (noccb,nvirb,noccb,nvirb), 'f8') eris.OOVV = eris.feri.create_dataset('OOVV', (noccb,noccb,nvirb,nvirb), 'f8') eris.OVVO = eris.feri.create_dataset('OVVO', (noccb,nvirb,nvirb,noccb), 'f8') eris.OVVV = eris.feri.create_dataset('OVVV', (noccb,nvirb,nvirb*(nvirb+1)//2), 'f8') #eris.VVVV = eris.feri.create_dataset('VVVV', (nvirb,nvirb,nvirb,nvirb), 'f8') eris.ooOO = eris.feri.create_dataset('ooOO', (nocca,nocca,noccb,noccb), 'f8') eris.ovOO = eris.feri.create_dataset('ovOO', (nocca,nvira,noccb,noccb), 'f8') eris.ovOV = eris.feri.create_dataset('ovOV', (nocca,nvira,noccb,nvirb), 'f8') eris.ooVV = eris.feri.create_dataset('ooVV', (nocca,nocca,nvirb,nvirb), 'f8') eris.ovVO = eris.feri.create_dataset('ovVO', (nocca,nvira,nvirb,noccb), 'f8') eris.ovVV = eris.feri.create_dataset('ovVV', (nocca,nvira,nvirb*(nvirb+1)//2), 'f8') #eris.vvVV = eris.feri.create_dataset('vvVV', (nvira,nvira,nvirb,nvirb), 'f8') eris.OVoo = eris.feri.create_dataset('OVoo', (noccb,nvirb,nocca,nocca), 'f8') eris.OOvv = eris.feri.create_dataset('OOvv', (noccb,noccb,nvira,nvira), 'f8') eris.OVvo = eris.feri.create_dataset('OVvo', (noccb,nvirb,nvira,nocca), 'f8') eris.OVvv = eris.feri.create_dataset('OVvv', (noccb,nvirb,nvira*(nvira+1)//2), 'f8') cput1 = time.clock(), time.time() mol = mycc.mol # <ij||pq> = <ij|pq> - <ij|qp> = (ip|jq) - (iq|jp) tmpf = lib.H5TmpFile() if nocca > 0: ao2mo.general(mol, (orboa,moa,moa,moa), tmpf, 'aa') buf = np.empty((nmoa,nmoa,nmoa)) for i in range(nocca): lib.unpack_tril(tmpf['aa'][i*nmoa:(i+1)*nmoa], out=buf) eris.oooo[i] = buf[:nocca,:nocca,:nocca] eris.ovoo[i] = buf[nocca:,:nocca,:nocca] eris.ovov[i] = buf[nocca:,:nocca,nocca:] eris.oovv[i] = buf[:nocca,nocca:,nocca:] eris.ovvo[i] = buf[nocca:,nocca:,:nocca] eris.ovvv[i] = lib.pack_tril(buf[nocca:,nocca:,nocca:]) del(tmpf['aa']) if noccb > 0: buf = np.empty((nmob,nmob,nmob)) ao2mo.general(mol, (orbob,mob,mob,mob), tmpf, 'bb') for i in range(noccb): lib.unpack_tril(tmpf['bb'][i*nmob:(i+1)*nmob], out=buf) eris.OOOO[i] = buf[:noccb,:noccb,:noccb] eris.OVOO[i] = buf[noccb:,:noccb,:noccb] eris.OVOV[i] = buf[noccb:,:noccb,noccb:] eris.OOVV[i] = buf[:noccb,noccb:,noccb:] eris.OVVO[i] = buf[noccb:,noccb:,:noccb] eris.OVVV[i] = lib.pack_tril(buf[noccb:,noccb:,noccb:]) del(tmpf['bb']) if nocca > 0: buf = np.empty((nmoa,nmob,nmob)) ao2mo.general(mol, (orboa,moa,mob,mob), tmpf, 'ab') for i in range(nocca): lib.unpack_tril(tmpf['ab'][i*nmoa:(i+1)*nmoa], out=buf) eris.ooOO[i] = buf[:nocca,:noccb,:noccb] eris.ovOO[i] = buf[nocca:,:noccb,:noccb] eris.ovOV[i] = buf[nocca:,:noccb,noccb:] eris.ooVV[i] = buf[:nocca,noccb:,noccb:] eris.ovVO[i] = buf[nocca:,noccb:,:noccb] eris.ovVV[i] = lib.pack_tril(buf[nocca:,noccb:,noccb:]) del(tmpf['ab']) if noccb > 0: buf = np.empty((nmob,nmoa,nmoa)) ao2mo.general(mol, (orbob,mob,moa,moa), tmpf, 'ba') for i in range(noccb): lib.unpack_tril(tmpf['ba'][i*nmob:(i+1)*nmob], out=buf) eris.OVoo[i] = buf[noccb:,:nocca,:nocca] eris.OOvv[i] = buf[:noccb,nocca:,nocca:] eris.OVvo[i] = buf[noccb:,nocca:,:nocca] eris.OVvv[i] = lib.pack_tril(buf[noccb:,nocca:,nocca:]) del(tmpf['ba']) buf = None cput1 = logger.timer_debug1(mycc, 'transforming oopq, ovpq', *cput1) if not mycc.direct: ao2mo.full(mol, orbva, eris.feri, dataname='vvvv') ao2mo.full(mol, orbvb, eris.feri, dataname='VVVV') ao2mo.general(mol, (orbva,orbva,orbvb,orbvb), eris.feri, dataname='vvVV') eris.vvvv = eris.feri['vvvv'] eris.VVVV = eris.feri['VVVV'] eris.vvVV = eris.feri['vvVV'] cput1 = logger.timer_debug1(mycc, 'transforming vvvv', *cput1) return eris
def get_k_e1_kpts(mydf, dm_kpts, kpts=np.zeros((1, 3)), kpts_band=None, exxdiv=None): '''Derivatives of exchange (K) AO matrix at sampled k-points. ''' cell = mydf.cell mesh = mydf.mesh coords = cell.gen_uniform_grids(mesh) ngrids = coords.shape[0] if getattr(dm_kpts, 'mo_coeff', None) is not None: mo_coeff = dm_kpts.mo_coeff mo_occ = dm_kpts.mo_occ else: mo_coeff = None kpts = np.asarray(kpts) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] weight = 1. / nkpts * (cell.vol / ngrids) kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) if gamma_point(kpts_band) and gamma_point(kpts): vk_kpts = np.zeros((3, nset, nband, nao, nao), dtype=dms.dtype) else: vk_kpts = np.zeros((3, nset, nband, nao, nao), dtype=np.complex128) coords = mydf.grids.coords if input_band is None: ao2_kpts = [ np.asarray(ao.transpose(0, 2, 1), order='C') for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts, deriv=1) ] ao1_kpts = ao2_kpts ao2_kpts = [ao2_kpt[0] for ao2_kpt in ao2_kpts] else: ao2_kpts = [ np.asarray(ao.T, order='C') for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts) ] ao1_kpts = [ np.asarray(ao.transpose(0, 2, 1), order='C') for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts_band, deriv=1) ] if mo_coeff is not None and nset == 1: mo_coeff = [ mo_coeff[k][:, occ > 0] * np.sqrt(occ[occ > 0]) for k, occ in enumerate(mo_occ) ] ao2_kpts = [np.dot(mo_coeff[k].T, ao) for k, ao in enumerate(ao2_kpts)] mem_now = lib.current_memory()[0] max_memory = mydf.max_memory - mem_now blksize = int( min(nao, max(1, (max_memory - mem_now) * 1e6 / 16 / 4 / 3 / ngrids / nao))) logger.debug1(mydf, 'fft_jk: get_k_kpts max_memory %s blksize %d', max_memory, blksize) vR_dm = np.empty((3, nset, nao, ngrids), dtype=vk_kpts.dtype) t1 = (logger.process_clock(), logger.perf_counter()) for k2, ao2T in enumerate(ao2_kpts): if ao2T.size == 0: continue kpt2 = kpts[k2] naoj = ao2T.shape[0] if mo_coeff is None or nset > 1: ao_dms = [lib.dot(dms[i, k2], ao2T.conj()) for i in range(nset)] else: ao_dms = [ao2T.conj()] for k1, ao1T in enumerate(ao1_kpts): kpt1 = kpts_band[k1] # If we have an ewald exxdiv, we add the G=0 correction near the # end of the function to bypass any discretization errors # that arise from the FFT. if exxdiv == 'ewald' or exxdiv is None: coulG = tools.get_coulG(cell, kpt2 - kpt1, False, mydf, mesh) else: coulG = tools.get_coulG(cell, kpt2 - kpt1, exxdiv, mydf, mesh) if is_zero(kpt1 - kpt2): expmikr = np.array(1.) else: expmikr = np.exp(-1j * np.dot(coords, kpt2 - kpt1)) for p0, p1 in lib.prange(0, nao, blksize): rho1 = np.einsum('aig,jg->aijg', ao1T[1:, p0:p1].conj() * expmikr, ao2T) vG = tools.fft(rho1.reshape(-1, ngrids), mesh) rho1 = None vG *= coulG vR = tools.ifft(vG, mesh).reshape(3, p1 - p0, naoj, ngrids) vG = None if vR_dm.dtype == np.double: vR = vR.real for i in range(nset): np.einsum('aijg,jg->aig', vR, ao_dms[i], out=vR_dm[:, i, p0:p1]) vR = None vR_dm *= expmikr.conj() for i in range(nset): vk_kpts[:, i, k1] -= weight * np.einsum( 'aig,jg->aij', vR_dm[:, i], ao1T[0]) t1 = logger.timer_debug1(mydf, 'get_k_kpts: make_kpt (%d,*)' % k2, *t1) # Ewald correction has no contribution to nuclear gradient unless range separted Coulomb is used # The gradient correction part is not added in the vk matrix if exxdiv == 'ewald' and cell.omega != 0: raise NotImplementedError("Range Separated Coulomb") # when cell.omega !=0: madelung constant will have a non-zero derivative vk_kpts = np.asarray( [_format_jks(vk, dm_kpts, input_band, kpts) for vk in vk_kpts]) return vk_kpts
def transform_integrals_outcore(myadc): cput0 = (logger.process_clock(), logger.perf_counter()) log = logger.Logger(myadc.stdout, myadc.verbose) mo_a = myadc.mo_coeff[0] mo_b = myadc.mo_coeff[1] nmo_a = mo_a.shape[1] nmo_b = mo_b.shape[1] occ_a = myadc.mo_coeff[0][:, :myadc._nocc[0]] occ_b = myadc.mo_coeff[1][:, :myadc._nocc[1]] vir_a = myadc.mo_coeff[0][:, myadc._nocc[0]:] vir_b = myadc.mo_coeff[1][:, myadc._nocc[1]:] nocc_a = occ_a.shape[1] nocc_b = occ_b.shape[1] nvir_a = vir_a.shape[1] nvir_b = vir_b.shape[1] nvpair_a = nvir_a * (nvir_a + 1) // 2 nvpair_b = nvir_b * (nvir_b + 1) // 2 eris = lambda: None eris.feri1 = lib.H5TmpFile() eris.oooo = eris.feri1.create_dataset('oooo', (nocc_a, nocc_a, nocc_a, nocc_a), 'f8') eris.oovv = eris.feri1.create_dataset('oovv', (nocc_a, nocc_a, nvir_a, nvir_a), 'f8', chunks=(nocc_a, nocc_a, 1, nvir_a)) eris.ovoo = eris.feri1.create_dataset('ovoo', (nocc_a, nvir_a, nocc_a, nocc_a), 'f8', chunks=(nocc_a, 1, nocc_a, nocc_a)) eris.ovvo = eris.feri1.create_dataset('ovvo', (nocc_a, nvir_a, nvir_a, nocc_a), 'f8', chunks=(nocc_a, 1, nvir_a, nocc_a)) eris.ovvv = eris.feri1.create_dataset('ovvv', (nocc_a, nvir_a, nvpair_a), 'f8') eris.OOOO = eris.feri1.create_dataset('OOOO', (nocc_b, nocc_b, nocc_b, nocc_b), 'f8') eris.OOVV = eris.feri1.create_dataset('OOVV', (nocc_b, nocc_b, nvir_b, nvir_b), 'f8', chunks=(nocc_b, nocc_b, 1, nvir_b)) eris.OVOO = eris.feri1.create_dataset('OVOO', (nocc_b, nvir_b, nocc_b, nocc_b), 'f8', chunks=(nocc_b, 1, nocc_b, nocc_b)) eris.OVVO = eris.feri1.create_dataset('OVVO', (nocc_b, nvir_b, nvir_b, nocc_b), 'f8', chunks=(nocc_b, 1, nvir_b, nocc_b)) eris.OVVV = eris.feri1.create_dataset('OVVV', (nocc_b, nvir_b, nvpair_b), 'f8') eris.ooOO = eris.feri1.create_dataset('ooOO', (nocc_a, nocc_a, nocc_b, nocc_b), 'f8') eris.ooVV = eris.feri1.create_dataset('ooVV', (nocc_a, nocc_a, nvir_b, nvir_b), 'f8', chunks=(nocc_a, nocc_a, 1, nvir_b)) eris.ovOO = eris.feri1.create_dataset('ovOO', (nocc_a, nvir_a, nocc_b, nocc_b), 'f8', chunks=(nocc_a, 1, nocc_b, nocc_b)) eris.ovVO = eris.feri1.create_dataset('ovVO', (nocc_a, nvir_a, nvir_b, nocc_b), 'f8', chunks=(nocc_a, 1, nvir_b, nocc_b)) eris.ovVV = eris.feri1.create_dataset('ovVV', (nocc_a, nvir_a, nvpair_b), 'f8') eris.OOvv = eris.feri1.create_dataset('OOvv', (nocc_b, nocc_b, nvir_a, nvir_a), 'f8', chunks=(nocc_b, nocc_b, 1, nvir_a)) eris.OVoo = eris.feri1.create_dataset('OVoo', (nocc_b, nvir_b, nocc_a, nocc_a), 'f8', chunks=(nocc_b, 1, nocc_a, nocc_a)) eris.OVvo = eris.feri1.create_dataset('OVvo', (nocc_b, nvir_b, nvir_a, nocc_a), 'f8', chunks=(nocc_b, 1, nvir_a, nocc_a)) eris.OVvv = eris.feri1.create_dataset('OVvv', (nocc_b, nvir_b, nvpair_a), 'f8') cput1 = logger.process_clock(), logger.perf_counter() mol = myadc.mol tmpf = lib.H5TmpFile() if nocc_a > 0: ao2mo.general(mol, (occ_a, mo_a, mo_a, mo_a), tmpf, 'aa') buf = np.empty((nmo_a, nmo_a, nmo_a)) for i in range(nocc_a): lib.unpack_tril(tmpf['aa'][i * nmo_a:(i + 1) * nmo_a], out=buf) eris.oooo[i] = buf[:nocc_a, :nocc_a, :nocc_a] eris.ovoo[i] = buf[nocc_a:, :nocc_a, :nocc_a] eris.oovv[i] = buf[:nocc_a, nocc_a:, nocc_a:] eris.ovvo[i] = buf[nocc_a:, nocc_a:, :nocc_a] eris.ovvv[i] = lib.pack_tril(buf[nocc_a:, nocc_a:, nocc_a:]) del (tmpf['aa']) if nocc_b > 0: buf = np.empty((nmo_b, nmo_b, nmo_b)) ao2mo.general(mol, (occ_b, mo_b, mo_b, mo_b), tmpf, 'bb') for i in range(nocc_b): lib.unpack_tril(tmpf['bb'][i * nmo_b:(i + 1) * nmo_b], out=buf) eris.OOOO[i] = buf[:nocc_b, :nocc_b, :nocc_b] eris.OVOO[i] = buf[nocc_b:, :nocc_b, :nocc_b] eris.OOVV[i] = buf[:nocc_b, nocc_b:, nocc_b:] eris.OVVO[i] = buf[nocc_b:, nocc_b:, :nocc_b] eris.OVVV[i] = lib.pack_tril(buf[nocc_b:, nocc_b:, nocc_b:]) del (tmpf['bb']) if nocc_a > 0: buf = np.empty((nmo_a, nmo_b, nmo_b)) ao2mo.general(mol, (occ_a, mo_a, mo_b, mo_b), tmpf, 'ab') for i in range(nocc_a): lib.unpack_tril(tmpf['ab'][i * nmo_a:(i + 1) * nmo_a], out=buf) eris.ooOO[i] = buf[:nocc_a, :nocc_b, :nocc_b] eris.ovOO[i] = buf[nocc_a:, :nocc_b, :nocc_b] eris.ooVV[i] = buf[:nocc_a, nocc_b:, nocc_b:] eris.ovVO[i] = buf[nocc_a:, nocc_b:, :nocc_b] eris.ovVV[i] = lib.pack_tril(buf[nocc_a:, nocc_b:, nocc_b:]) del (tmpf['ab']) if nocc_b > 0: buf = np.empty((nmo_b, nmo_a, nmo_a)) ao2mo.general(mol, (occ_b, mo_b, mo_a, mo_a), tmpf, 'ba') for i in range(nocc_b): lib.unpack_tril(tmpf['ba'][i * nmo_b:(i + 1) * nmo_b], out=buf) eris.OVoo[i] = buf[nocc_b:, :nocc_a, :nocc_a] eris.OOvv[i] = buf[:nocc_b, nocc_a:, nocc_a:] eris.OVvo[i] = buf[nocc_b:, nocc_a:, :nocc_a] eris.OVvv[i] = lib.pack_tril(buf[nocc_b:, nocc_a:, nocc_a:]) del (tmpf['ba']) buf = None cput1 = logger.timer_debug1(myadc, 'transforming oopq, ovpq', *cput1) ############### forming eris_vvvv ######################################## if (myadc.method == "adc(2)-x" or myadc.method == "adc(3)"): cput2 = logger.process_clock(), logger.perf_counter() ind_vv_g = np.tril_indices(nvir_a, k=-1) ind_VV_g = np.tril_indices(nvir_b, k=-1) eris.vvvv_p = [] eris.VVVV_p = [] eris.vVvV_p = [] eris.VvVv_p = [] avail_mem = (myadc.max_memory - lib.current_memory()[0]) * 0.25 vvv_mem = (nvir_a**3) * 8 / 1e6 chnk_size = int(avail_mem / vvv_mem) if chnk_size <= 0: chnk_size = 1 for p in range(0, vir_a.shape[1], chnk_size): if chnk_size < vir_a.shape[1]: orb_slice = vir_a[:, p:p + chnk_size] else: orb_slice = vir_a[:, p:] _, tmp = tempfile.mkstemp() ao2mo.outcore.general(mol, (orb_slice, vir_a, vir_a, vir_a), tmp, max_memory=avail_mem, ioblk_size=100, compact=False) vvvv = radc_ao2mo.read_dataset(tmp, 'eri_mo') del (tmp) vvvv = vvvv.reshape(orb_slice.shape[1], vir_a.shape[1], vir_a.shape[1], vir_a.shape[1]) vvvv = np.ascontiguousarray(vvvv.transpose(0, 2, 1, 3)) vvvv -= np.ascontiguousarray(vvvv.transpose(0, 1, 3, 2)) vvvv = vvvv[:, :, ind_vv_g[0], ind_vv_g[1]] vvvv_p = radc_ao2mo.write_dataset(vvvv) del vvvv eris.vvvv_p.append(vvvv_p) for p in range(0, vir_b.shape[1], chnk_size): if chnk_size < vir_b.shape[1]: orb_slice = vir_b[:, p:p + chnk_size] else: orb_slice = vir_b[:, p:] _, tmp = tempfile.mkstemp() ao2mo.outcore.general(mol, (orb_slice, vir_b, vir_b, vir_b), tmp, max_memory=avail_mem, ioblk_size=100, compact=False) VVVV = radc_ao2mo.read_dataset(tmp, 'eri_mo') del (tmp) VVVV = VVVV.reshape(orb_slice.shape[1], vir_b.shape[1], vir_b.shape[1], vir_b.shape[1]) VVVV = np.ascontiguousarray(VVVV.transpose(0, 2, 1, 3)) VVVV -= np.ascontiguousarray(VVVV.transpose(0, 1, 3, 2)) VVVV = VVVV[:, :, ind_VV_g[0], ind_VV_g[1]] VVVV_p = radc_ao2mo.write_dataset(VVVV) del VVVV eris.VVVV_p.append(VVVV_p) for p in range(0, vir_a.shape[1], chnk_size): if chnk_size < vir_a.shape[1]: orb_slice = vir_a[:, p:p + chnk_size] else: orb_slice = vir_a[:, p:] _, tmp = tempfile.mkstemp() ao2mo.outcore.general(mol, (orb_slice, vir_a, vir_b, vir_b), tmp, max_memory=avail_mem, ioblk_size=100, compact=False) vVvV = radc_ao2mo.read_dataset(tmp, 'eri_mo') del (tmp) vVvV = vVvV.reshape(orb_slice.shape[1], vir_a.shape[1], vir_b.shape[1], vir_b.shape[1]) vVvV = np.ascontiguousarray(vVvV.transpose(0, 2, 1, 3)) vVvV = vVvV.reshape(-1, vir_b.shape[1], vir_a.shape[1] * vir_b.shape[1]) vVvV_p = radc_ao2mo.write_dataset(vVvV) del vVvV eris.vVvV_p.append(vVvV_p) for p in range(0, vir_b.shape[1], chnk_size): if chnk_size < vir_b.shape[1]: orb_slice = vir_b[:, p:p + chnk_size] else: orb_slice = vir_b[:, p:] _, tmp = tempfile.mkstemp() ao2mo.outcore.general(mol, (orb_slice, vir_b, vir_a, vir_a), tmp, max_memory=avail_mem, ioblk_size=100, compact=False) VvVv = radc_ao2mo.read_dataset(tmp, 'eri_mo') del tmp VvVv = VvVv.reshape(orb_slice.shape[1], vir_b.shape[1], vir_a.shape[1], vir_a.shape[1]) VvVv = np.ascontiguousarray(VvVv.transpose(0, 2, 1, 3)) VvVv = VvVv.reshape(-1, vir_a.shape[1], vir_b.shape[1] * vir_a.shape[1]) VvVv_p = radc_ao2mo.write_dataset(VvVv) del VvVv eris.VvVv_p.append(VvVv_p) cput2 = logger.timer_debug1(myadc, 'transforming vvvv', *cput2) log.timer('ADC outcore integral transformation', *cput0) return eris