def contract_1e(f1e, fcivec, norb, nelec, link_index=None): fcivec = numpy.asarray(fcivec, order='C') link_indexa, link_indexb = direct_spin1._unpack(norb, nelec, link_index) na, nlinka = link_indexa.shape[:2] nb, nlinkb = link_indexb.shape[:2] assert(fcivec.size == na*nb) ci1 = numpy.zeros_like(fcivec) f1e_tril = lib.pack_tril(f1e[0]) libfci.FCIcontract_a_1e(f1e_tril.ctypes.data_as(ctypes.c_void_p), fcivec.ctypes.data_as(ctypes.c_void_p), ci1.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(na), ctypes.c_int(nb), ctypes.c_int(nlinka), ctypes.c_int(nlinkb), link_indexa.ctypes.data_as(ctypes.c_void_p), link_indexb.ctypes.data_as(ctypes.c_void_p)) f1e_tril = lib.pack_tril(f1e[1]) libfci.FCIcontract_b_1e(f1e_tril.ctypes.data_as(ctypes.c_void_p), fcivec.ctypes.data_as(ctypes.c_void_p), ci1.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(na), ctypes.c_int(nb), ctypes.c_int(nlinka), ctypes.c_int(nlinkb), link_indexa.ctypes.data_as(ctypes.c_void_p), link_indexb.ctypes.data_as(ctypes.c_void_p)) return ci1
def _add_vvvv_full(mycc, t1T, t2T, eris, out=None, with_ovvv=False): '''Ht2 = numpy.einsum('ijcd,acdb->ijab', t2, vvvv) without using symmetry t2[ijab] = t2[jiba] in t2 or Ht2 ''' time0 = time.clock(), time.time() log = logger.Logger(mycc.stdout, mycc.verbose) nvir_seg, nvir, nocc = t2T.shape[:3] vloc0, vloc1 = _task_location(nvir, rank) nocc2 = nocc*(nocc+1)//2 if t1T is None: tau = lib.pack_tril(t2T.reshape(nvir_seg*nvir,nocc2)) else: tau = t2T + numpy.einsum('ai,bj->abij', t1T[vloc0:vloc1], t1T) tau = lib.pack_tril(tau.reshape(nvir_seg*nvir,nocc2)) tau = tau.reshape(nvir_seg,nvir,nocc2) if mycc.direct: # AO-direct CCSD if with_ovvv: raise NotImplementedError mo = getattr(eris, 'mo_coeff', None) if mo is None: # If eris does not have the attribute mo_coeff mo = _mo_without_core(mycc, mycc.mo_coeff) ao_loc = mycc.mol.ao_loc_nr() nao, nmo = mo.shape ntasks = mpi.pool.size task_sh_locs = lib.misc._balanced_partition(ao_loc, ntasks) ao_loc0 = ao_loc[task_sh_locs[rank ]] ao_loc1 = ao_loc[task_sh_locs[rank+1]] orbv = mo[:,nocc:] tau = lib.einsum('abij,pb->apij', tau, orbv) tau_priv = numpy.zeros((ao_loc1-ao_loc0,nao,nocc,nocc)) for task_id, tau in _rotate_tensor_block(tau): loc0, loc1 = _task_location(nvir, task_id) tau_priv += lib.einsum('pa,abij->pbij', orbv[ao_loc0:ao_loc1,loc0:loc1], tau) tau = None time1 = log.timer_debug1('vvvv-tau mo2ao', *time0) buf = _contract_vvvv_t2(mycc, None, tau_priv, task_sh_locs, None, log) buf = buf.reshape(tau_priv.shape) tau_priv = None time1 = log.timer_debug1('vvvv-tau contraction', *time1) buf = lib.einsum('apij,pb->abij', buf, orbv) Ht2 = numpy.ndarray(t2T.shape, buffer=out) Ht2[:] = 0 for task_id, buf in _rotate_tensor_block(buf): ao_loc0 = ao_loc[task_sh_locs[task_id ]] ao_loc1 = ao_loc[task_sh_locs[task_id+1]] Ht2 += lib.einsum('pa,pbij->abij', orbv[ao_loc0:ao_loc1,vloc0:vloc1], buf) time1 = log.timer_debug1('vvvv-tau ao2mo', *time1) else: raise NotImplementedError return Ht2.reshape(t2T.shape)
def amplitudes_to_vector(t1, t2, out=None): t2T = t2.transpose(2,3,0,1) nvir_seg, nvir, nocc = t2T.shape[:3] if rank == 0: t1T = t1.T nov = nocc * nvir nocc2 = nocc*(nocc+1)//2 size = nov + nvir_seg*nvir*nocc2 vector = numpy.ndarray(size, t1.dtype, buffer=out) vector[:nov] = t1T.ravel() lib.pack_tril(t2T.reshape(nvir_seg*nvir,nocc,nocc), out=vector[nov:]) else: vector = lib.pack_tril(t2T.reshape(nvir_seg*nvir,nocc,nocc)) return vector
def __init__(self, myci, mo_coeff, method='incore'): mol = myci.mol mf = myci._scf nocc = myci.nocc nmo = myci.nmo nvir = nmo - nocc if mo_coeff is None: self.mo_coeff = mo_coeff = myci.mo_coeff if (method == 'incore' and mf._eri is not None): eri = ao2mo.kernel(mf._eri, mo_coeff, verbose=myci.verbose) else: eri = ao2mo.kernel(mol, mo_coeff, verbose=myci.verbose) eri = ao2mo.restore(1, eri, nmo) eri = eri.reshape(nmo,nmo,nmo,nmo) self.oooo = eri[:nocc,:nocc,:nocc,:nocc] self.vvoo = eri[nocc:,nocc:,:nocc,:nocc] self.vooo = eri[nocc:,:nocc,:nocc,:nocc] self.voov = eri[nocc:,:nocc,:nocc,nocc:] self.vovv = lib.pack_tril(eri[nocc:,:nocc,nocc:,nocc:].reshape(-1,nvir,nvir)) self.vvvv = ao2mo.restore(4, eri[nocc:,nocc:,nocc:,nocc:].copy(), nvir) dm = mf.make_rdm1() vhf = mf.get_veff(mol, dm) h1 = mf.get_hcore(mol) self.fock = reduce(numpy.dot, (mo_coeff.T, h1 + vhf, mo_coeff))
def contract_1e(f1e, fcivec, norb, nelec, link_index=None): '''Contract the 1-electron Hamiltonian with a FCI vector to get a new FCI vector. ''' fcivec = numpy.asarray(fcivec, order='C') link_indexa, link_indexb = _unpack(norb, nelec, link_index) na, nlinka = link_indexa.shape[:2] nb, nlinkb = link_indexb.shape[:2] assert(fcivec.size == na*nb) f1e_tril = lib.pack_tril(f1e) ci1 = numpy.zeros_like(fcivec) libfci.FCIcontract_a_1e(f1e_tril.ctypes.data_as(ctypes.c_void_p), fcivec.ctypes.data_as(ctypes.c_void_p), ci1.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(na), ctypes.c_int(nb), ctypes.c_int(nlinka), ctypes.c_int(nlinkb), link_indexa.ctypes.data_as(ctypes.c_void_p), link_indexb.ctypes.data_as(ctypes.c_void_p)) libfci.FCIcontract_b_1e(f1e_tril.ctypes.data_as(ctypes.c_void_p), fcivec.ctypes.data_as(ctypes.c_void_p), ci1.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(na), ctypes.c_int(nb), ctypes.c_int(nlinka), ctypes.c_int(nlinkb), link_indexa.ctypes.data_as(ctypes.c_void_p), link_indexb.ctypes.data_as(ctypes.c_void_p)) return ci1
def ecp_int(cell, kpts=None): if rank == 0: comm.bcast(cell.dumps()) else: cell = pgto.loads(comm.bcast(None)) if kpts is None: kpts_lst = numpy.zeros((1,3)) else: kpts_lst = numpy.reshape(kpts, (-1,3)) ecpcell = gto.Mole() ecpcell._atm = cell._atm # append a fictitious s function to mimic the auxiliary index in pbc.incore. # ptr2last_env_idx to force PBCnr3c_fill_* function to copy the entire "env" ptr2last_env_idx = len(cell._env) - 1 ecpbas = numpy.vstack([[0, 0, 1, 1, 0, ptr2last_env_idx, 0, 0], cell._ecpbas]).astype(numpy.int32) ecpcell._bas = ecpbas ecpcell._env = cell._env # In pbc.incore _ecpbas is appended to two sets of cell._bas and the # fictitious s function. cell._env[AS_ECPBAS_OFFSET] = cell.nbas * 2 + 1 cell._env[AS_NECPBAS] = len(cell._ecpbas) kptij_lst = numpy.hstack((kpts_lst,kpts_lst)).reshape(-1,2,3) nkpts = len(kpts_lst) if abs(kpts_lst).sum() < 1e-9: # gamma_point dtype = numpy.double else: dtype = numpy.complex128 ao_loc = cell.ao_loc_nr() nao = ao_loc[-1] mat = numpy.zeros((nkpts,nao,nao), dtype=dtype) intor = cell._add_suffix('ECPscalar') int3c = incore.wrap_int3c(cell, ecpcell, intor, kptij_lst=kptij_lst) # shls_slice of auxiliary index (0,1) corresponds to the fictitious s function tasks = [(i, i+1, j, j+1, 0, 1) # shls_slice for i in range(cell.nbas) for j in range(i+1)] for shls_slice in mpi.work_stealing_partition(tasks): i0 = ao_loc[shls_slice[0]] i1 = ao_loc[shls_slice[1]] j0 = ao_loc[shls_slice[2]] j1 = ao_loc[shls_slice[3]] buf = numpy.empty((nkpts,i1-i0,j1-j0), dtype=dtype) mat[:,i0:i1,j0:j1] = int3c(shls_slice, buf) buf = mpi.reduce(mat) if rank == 0: mat = [] for k, kpt in enumerate(kpts_lst): v = lib.unpack_tril(lib.pack_tril(buf[k]), lib.HERMITIAN) if abs(kpt).sum() < 1e-9: # gamma_point: v = v.real mat.append(v) if kpts is None or numpy.shape(kpts) == (3,): mat = mat[0] return mat
def cosmo_fock_o1(cosmo, dm): mol = cosmo.mol nao = dm.shape[0] # phi cosmo.loadsegs() coords = cosmo.cosurf[:cosmo.nps*3].reshape(-1,3) fakemol = _make_fakemol(coords) j3c = df.incore.aux_e2(mol, fakemol, intor='cint3c2e_sph', aosym='s2ij') tril_dm = lib.pack_tril(dm) * 2 diagidx = numpy.arange(nao) diagidx = diagidx*(diagidx+1)//2 + diagidx tril_dm[diagidx] *= .5 cosmo.phi = -numpy.einsum('x,xk->k', tril_dm, j3c) for ia in range(mol.natm): cosmo.phi += mol.atom_charge(ia)/lib.norm(mol.atom_coord(ia)-coords, axis=1) cosmo.savesegs() # qk cosmo.charges() # vpot cosmo.loadsegs() #X fakemol = _make_fakemol(cosmo.cosurf[:cosmo.nps*3].reshape(-1,3)) #X j3c = df.incore.aux_e2(mol, fakemol, intor='cint3c2e_sph', aosym='s2ij') fock = lib.unpack_tril(numpy.einsum('xk,k->x', j3c, -cosmo.qcos[:cosmo.nps])) fepsi = cosmo.fepsi() fock = fepsi*fock return fock
def part_eri_hermi(eri, norb, nimp): eri1 = ao2mo.restore(4, eri, norb) for i in range(eri1.shape[0]): tmp = lib.unpack_tril(eri1[i]) tmp[nimp:] = 0 eri1[i] = lib.pack_tril(tmp + tmp.T) eri1 = lib.transpose_sum(eri1, inplace=True) return ao2mo.restore(8, eri1, norb) * 0.25
def _make_Lpq(mydf, mol, auxmol): atm, bas, env, ao_loc = incore._env_and_aoloc('cint3c1e_sph', mol, auxmol) nao = ao_loc[mol.nbas] naux = ao_loc[-1] - nao nao_pair = nao * (nao+1) // 2 if mydf.metric.upper() == 'S': intor = 'cint3c1e_sph' s_aux = auxmol.intor_symmetric('cint1e_ovlp_sph') elif mydf.metric.upper() == 'T': intor = 'cint3c1e_p2_sph' s_aux = auxmol.intor_symmetric('cint1e_kin_sph') * 2 else: # metric.upper() == 'J' intor = 'cint3c2e_sph' s_aux = incore.fill_2c2e(mol, auxmol) cintopt = gto.moleintor.make_cintopt(atm, bas, env, intor) if mydf.charge_constraint: ovlp = lib.pack_tril(mol.intor_symmetric('cint1e_ovlp_sph')) aux_loc = auxmol.ao_loc_nr() s_index = numpy.hstack([range(aux_loc[i],aux_loc[i+1]) for i,l in enumerate(auxmol._bas[:,ANG_OF]) if l == 0]) a = numpy.zeros((naux+1,naux+1)) a[:naux,:naux] = s_aux a[naux,s_index] = a[s_index,naux] = 1 try: cd = scipy.linalg.cho_factor(a) def solve(Lpq): return scipy.linalg.cho_solve(cd, Lpq) except scipy.linalg.LinAlgError: def solve(Lpq): return scipy.linalg.solve(a, Lpq) else: cd = scipy.linalg.cho_factor(s_aux) def solve(Lpq): return scipy.linalg.cho_solve(cd, Lpq, overwrite_b=True) def get_Lpq(shls_slice, col0, col1, buf): # Be cautious here, _ri.nr_auxe2 assumes buf in F-order Lpq = _ri.nr_auxe2(intor, atm, bas, env, shls_slice, ao_loc, 's2ij', 1, cintopt, buf).T if mydf.charge_constraint: Lpq = numpy.ndarray(shape=(naux+1,col1-col0), buffer=buf) Lpq[naux,:] = ovlp[col0:col1] Lpq1 = solve(Lpq) assert(Lpq1.flags.f_contiguous) lib.transpose(Lpq1.T, out=Lpq) return Lpq[:naux] else: return solve(Lpq) return get_Lpq
def _int_nuc_vloc(mydf, nuccell, kpts, intor='int3c2e_sph', aosym='s2', comp=1): '''Vnuc - Vloc''' cell = mydf.cell nkpts = len(kpts) # Use the 3c2e code with steep s gaussians to mimic nuclear density fakenuc = aft._fake_nuc(cell) fakenuc._atm, fakenuc._bas, fakenuc._env = \ gto.conc_env(nuccell._atm, nuccell._bas, nuccell._env, fakenuc._atm, fakenuc._bas, fakenuc._env) kptij_lst = numpy.hstack((kpts,kpts)).reshape(-1,2,3) ishs = mpi.work_balanced_partition(numpy.arange(cell.nbas), costs=numpy.arange(1, cell.nbas+1)) if len(ishs) > 0: ish0, ish1 = ishs[0], ishs[-1]+1 buf = incore.aux_e2(cell, fakenuc, intor, aosym='s2', kptij_lst=kptij_lst, shls_slice=(ish0,ish1,0,cell.nbas,0,fakenuc.nbas)) else: buf = numpy.zeros(0) charge = cell.atom_charges() charge = numpy.append(charge, -charge) # (charge-of-nuccell, charge-of-fakenuc) nao = cell.nao_nr() nchg = len(charge) nao_pair = nao*(nao+1)//2 buf = buf.reshape(nkpts,-1,nchg) # scaled by 1./mpi.pool.size because nuc is mpi.reduced in get_nuc function buf = numpy.einsum('kxz,z->kx', buf, 1./mpi.pool.size*charge) mat = numpy.empty((nkpts,nao_pair), dtype=numpy.complex128) for k in range(nkpts): mat[k] = mpi.allgather(buf[k]) if (rank == 0 and cell.dimension == 3 and intor in ('int3c2e', 'int3c2e_sph', 'int3c2e_cart')): assert(comp == 1) charges = cell.atom_charges() nucbar = sum([z/nuccell.bas_exp(i)[0] for i,z in enumerate(charges)]) nucbar *= numpy.pi/cell.vol ovlp = cell.pbc_intor('int1e_ovlp_sph', 1, lib.HERMITIAN, kpts) for k in range(nkpts): if aosym == 's1': mat[k] += nucbar * ovlp[k].reshape(nao_pair) else: mat[k] += nucbar * lib.pack_tril(ovlp[k]) return mat
def test_unpack(self): a = numpy.random.random((400,400)) a = a+a*.5j for i in range(400): a[i,i] = a[i,i].real b = a-a.T.conj() b = numpy.array((b,b)) x = lib.hermi_triu(b[0].T, hermi=2, inplace=0) self.assertAlmostEqual(abs(b[0].T-x).max(), 0, 12) x = lib.hermi_triu(b[1], hermi=2, inplace=0) self.assertAlmostEqual(abs(b[1]-x).max(), 0, 12) self.assertAlmostEqual(abs(x - lib.unpack_tril(lib.pack_tril(x), 2)).max(), 0, 12) x = lib.hermi_triu(a, hermi=1, inplace=0) self.assertAlmostEqual(abs(x-x.T.conj()).max(), 0, 12) xs = numpy.asarray((x,x,x)) self.assertAlmostEqual(abs(xs - lib.unpack_tril(lib.pack_tril(xs))).max(), 0, 12) numpy.random.seed(1) a = numpy.random.random((5050,20)) self.assertAlmostEqual(lib.finger(lib.unpack_tril(a, axis=0)), -103.03970592075423, 10)
def contract_1e(f1e, fcivec, norb, nelec, link_index=None): fcivec = numpy.asarray(fcivec, order='C') link_index = _unpack(norb, nelec, link_index) na, nlink = link_index.shape[:2] assert(fcivec.size == na**2) ci1 = numpy.empty_like(fcivec) f1e_tril = lib.pack_tril(f1e) libfci.FCIcontract_1e_spin0(f1e_tril.ctypes.data_as(ctypes.c_void_p), fcivec.ctypes.data_as(ctypes.c_void_p), ci1.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(na), ctypes.c_int(nlink), link_index.ctypes.data_as(ctypes.c_void_p)) # no *.5 because FCIcontract_2e_spin0 only compute half of the contraction return lib.transpose_sum(ci1, inplace=True).reshape(fcivec.shape)
def _int_nuc_vloc(mydf, nuccell, kpts, intor='int3c2e', aosym='s2', comp=1): '''Vnuc - Vloc''' cell = mydf.cell nkpts = len(kpts) # Use the 3c2e code with steep s gaussians to mimic nuclear density fakenuc = _fake_nuc(cell) fakenuc._atm, fakenuc._bas, fakenuc._env = \ gto.conc_env(nuccell._atm, nuccell._bas, nuccell._env, fakenuc._atm, fakenuc._bas, fakenuc._env) kptij_lst = numpy.hstack((kpts,kpts)).reshape(-1,2,3) buf = incore.aux_e2(cell, fakenuc, intor, aosym=aosym, comp=comp, kptij_lst=kptij_lst) charge = cell.atom_charges() charge = numpy.append(charge, -charge) # (charge-of-nuccell, charge-of-fakenuc) nao = cell.nao_nr() nchg = len(charge) if aosym == 's1': nao_pair = nao**2 else: nao_pair = nao*(nao+1)//2 if comp == 1: buf = buf.reshape(nkpts,nao_pair,nchg) mat = numpy.einsum('kxz,z->kx', buf, charge) else: buf = buf.reshape(nkpts,comp,nao_pair,nchg) mat = numpy.einsum('kcxz,z->kcx', buf, charge) # vbar is the interaction between the background charge # and the compensating function. 0D, 1D, 2D do not have vbar. if cell.dimension == 3 and intor in ('int3c2e', 'int3c2e_sph', 'int3c2e_cart'): assert(comp == 1) charge = -cell.atom_charges() nucbar = sum([z/nuccell.bas_exp(i)[0] for i,z in enumerate(charge)]) nucbar *= numpy.pi/cell.vol ovlp = cell.pbc_intor('int1e_ovlp', 1, lib.HERMITIAN, kpts) for k in range(nkpts): if aosym == 's1': mat[k] -= nucbar * ovlp[k].reshape(nao_pair) else: mat[k] -= nucbar * lib.pack_tril(ovlp[k]) return mat
def incore(eri, dm, hermi=0): assert(not numpy.iscomplexobj(eri)) eri = numpy.ascontiguousarray(eri) dm = numpy.ascontiguousarray(dm) nao = dm.shape[0] vj = numpy.empty((nao,nao)) vk = numpy.empty((nao,nao)) npair = nao*(nao+1)//2 if eri.ndim == 2 and npair*npair == eri.size: # 4-fold symmetry eri fdrv = getattr(libcvhf, 'CVHFnrs4_incore_drv') # 'ijkl,kl->ij' fvj = _fpointer('CVHFics4_kl_s2ij') # 'ijkl,il->jk' fvk = _fpointer('CVHFics4_il_s1jk') # or ## 'ijkl,ij->kl' #fvj = _fpointer('CVHFics4_ij_s2kl') ## 'ijkl,jk->il' #fvk = _fpointer('CVHFics4_jk_s1il') tridm = dm elif eri.ndim == 1 and npair*(npair+1)//2 == eri.size: # 8-fold symmetry eri fdrv = getattr(libcvhf, 'CVHFnrs8_incore_drv') fvj = _fpointer('CVHFics8_tridm_vj') if hermi == 1: fvk = _fpointer('CVHFics8_jk_s2il') else: fvk = _fpointer('CVHFics8_jk_s1il') tridm = lib.pack_tril(lib.transpose_sum(dm)) i = numpy.arange(nao) tridm[i*(i+1)//2+i] *= .5 else: raise RuntimeError('Array shape not consistent: DM %s, eri %s' % (dm.shape, eri.shape)) fdrv(eri.ctypes.data_as(ctypes.c_void_p), tridm.ctypes.data_as(ctypes.c_void_p), vj.ctypes.data_as(ctypes.c_void_p), dm.ctypes.data_as(ctypes.c_void_p), vk.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nao), fvj, fvk) if hermi != 0: vj = lib.hermi_triu(vj, hermi) vk = lib.hermi_triu(vk, hermi) else: vj = lib.hermi_triu(vj, 1) return vj, vk
def make_phi(pcmobj, dm, r_vdw, ui): mol = pcmobj.mol natm = mol.natm coords_1sph, weights_1sph = make_grids_one_sphere(pcmobj.lebedev_order) ngrid_1sph = coords_1sph.shape[0] if not (isinstance(dm, numpy.ndarray) and dm.ndim == 2): dm = dm[0] + dm[1] tril_dm = lib.pack_tril(dm+dm.T) nao = dm.shape[0] diagidx = numpy.arange(nao) diagidx = diagidx*(diagidx+1)//2 + diagidx tril_dm[diagidx] *= .5 atom_coords = mol.atom_coords() atom_charges = mol.atom_charges() extern_point_idx = ui > 0 cav_coords = (atom_coords.reshape(natm,1,3) + numpy.einsum('r,gx->rgx', r_vdw, coords_1sph)) v_phi = numpy.empty((natm,ngrid_1sph)) for ia in range(natm): # Note (-) sign is not applied to atom_charges, because (-) is explicitly # included in rhs and L matrix d_rs = atom_coords.reshape(-1,1,3) - cav_coords[ia] v_phi[ia] = numpy.einsum('z,zp->p', atom_charges, 1./lib.norm(d_rs,axis=2)) max_memory = pcmobj.max_memory - lib.current_memory()[0] blksize = int(max(max_memory*1e6/8/nao**2, 400)) cav_coords = cav_coords[extern_point_idx] v_phi_e = numpy.empty(cav_coords.shape[0]) int3c2e = mol._add_suffix('int3c2e') for i0, i1 in lib.prange(0, cav_coords.shape[0], blksize): fakemol = gto.fakemol_for_charges(cav_coords[i0:i1]) v_nj = df.incore.aux_e2(mol, fakemol, intor=int3c2e, aosym='s2ij') v_phi_e[i0:i1] = numpy.einsum('x,xk->k', tril_dm, v_nj) v_phi[extern_point_idx] -= v_phi_e ylm_1sph = numpy.vstack(sph.real_sph_vec(coords_1sph, pcmobj.lmax, True)) phi = -numpy.einsum('n,xn,jn,jn->jx', weights_1sph, ylm_1sph, ui, v_phi) return phi
def cosmo_occ_o1(cosmo, dm): mol = cosmo.mol nao = dm.shape[0] #cosmo.check() cosmo.occ0() cosmo.loadsegs() #cosmo.check() ioff = 3*cosmo.nps coords = cosmo.cosurf[ioff:ioff+cosmo.npspher*3].reshape(-1,3) fakemol = _make_fakemol(coords) j3c = df.incore.aux_e2(mol, fakemol, intor='cint3c2e_sph', aosym='s2ij') tril_dm = lib.pack_tril(dm) * 2 diagidx = numpy.arange(nao) diagidx = diagidx*(diagidx+1)//2 + diagidx tril_dm[diagidx] *= .5 cosmo.phio = -numpy.einsum('x,xk->k', tril_dm, j3c) for ia in range(mol.natm): cosmo.phio += mol.atom_charge(ia)/lib.norm(mol.atom_coord(ia)-coords, axis=1) cosmo.savesegs() return cosmo.occ1()
def _int_nuc_vloc(mydf, nuccell, kpts, intor='cint3c2e_sph'): '''Vnuc - Vloc''' cell = mydf.cell rcut = max(cell.rcut, nuccell.rcut) Ls = cell.get_lattice_Ls(rcut=rcut) expLk = numpy.asarray(numpy.exp(1j*numpy.dot(Ls, kpts.T)), order='C') nkpts = len(kpts) # Use the 3c2e code with steep s gaussians to mimic nuclear density fakenuc = _fake_nuc(cell) fakenuc._atm, fakenuc._bas, fakenuc._env = \ gto.conc_env(nuccell._atm, nuccell._bas, nuccell._env, fakenuc._atm, fakenuc._bas, fakenuc._env) nao = cell.nao_nr() buf = [numpy.zeros((nao,nao,fakenuc.natm), order='F', dtype=numpy.complex128) for k in range(nkpts)] ints = incore._wrap_int3c(cell, fakenuc, intor, 1, Ls, buf) atm, bas, env = ints._envs[:3] c_shls_slice = (ctypes.c_int*6)(0, cell.nbas, cell.nbas, cell.nbas*2, cell.nbas*2, cell.nbas*2+fakenuc.natm) xyz = numpy.asarray(cell.atom_coords(), order='C') ptr_coordL = atm[:cell.natm,gto.PTR_COORD] ptr_coordL = numpy.vstack((ptr_coordL,ptr_coordL+1,ptr_coordL+2)).T.copy('C') for l, L1 in enumerate(Ls): env[ptr_coordL] = xyz + L1 exp_Lk = numpy.einsum('k,ik->ik', expLk[l].conj(), expLk[:l+1]) exp_Lk = numpy.asarray(exp_Lk, order='C') exp_Lk[l] = .5 ints(exp_Lk, c_shls_slice) charge = cell.atom_charges() charge = numpy.append(charge, -charge) # (charge-of-nuccell, charge-of-fakenuc) for k, kpt in enumerate(kpts): v = numpy.einsum('ijz,z->ij', buf[k], charge) buf[k] = lib.pack_tril(v + v.T.conj()) return buf
def get_pnucp(mydf, kpts=None): cell = mydf.cell if kpts is None: kpts_lst = numpy.zeros((1,3)) else: kpts_lst = numpy.reshape(kpts, (-1,3)) log = logger.Logger(mydf.stdout, mydf.verbose) t1 = t0 = (time.clock(), time.time()) nkpts = len(kpts_lst) nao = cell.nao_nr() nao_pair = nao * (nao+1) // 2 Gv, Gvbase, kws = cell.get_Gv_weights(mydf.gs) kpt_allow = numpy.zeros(3) if mydf.eta == 0: charge = -cell.atom_charges() #coulG=4*numpy.pi/G^2 is cancelled with (sigma dot p i, sigma dot p j) SI = cell.get_SI(Gv) vGR = numpy.einsum('i,ix->x', 4*numpy.pi*charge, SI.real) * kws vGI = numpy.einsum('i,ix->x', 4*numpy.pi*charge, SI.imag) * kws wjR = numpy.zeros((nkpts,nao_pair)) wjI = numpy.zeros((nkpts,nao_pair)) else: nuccell = copy.copy(cell) half_sph_norm = .5/numpy.sqrt(numpy.pi) norm = half_sph_norm/mole._gaussian_int(2, mydf.eta) chg_env = [mydf.eta, norm] ptr_eta = cell._env.size ptr_norm = ptr_eta + 1 chg_bas = [[ia, 0, 1, 1, 0, ptr_eta, ptr_norm, 0] for ia in range(cell.natm)] nuccell._atm = cell._atm nuccell._bas = numpy.asarray(chg_bas, dtype=numpy.int32) nuccell._env = numpy.hstack((cell._env, chg_env)) wj = lib.asarray(mydf._int_nuc_vloc(nuccell, kpts_lst, 'cint3c2e_pvp1_sph')) wjR = wj.real wjI = wj.imag t1 = log.timer_debug1('pnucp pass1: analytic int', *t1) charge = -cell.atom_charges() #coulG=4*numpy.pi/G^2 is cancelled with (sigma dot p i, sigma dot p j) aoaux = ft_ao.ft_ao(nuccell, Gv) vGR = numpy.einsum('i,xi->x', 4*numpy.pi*charge, aoaux.real) * kws vGI = numpy.einsum('i,xi->x', 4*numpy.pi*charge, aoaux.imag) * kws max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) for k, pqkR, pqkI, p0, p1 \ in mydf.ft_loop(mydf.gs, kpt_allow, kpts_lst, max_memory=max_memory, aosym='s2'): # rho_ij(G) nuc(-G) / G^2 # = [Re(rho_ij(G)) + Im(rho_ij(G))*1j] [Re(nuc(G)) - Im(nuc(G))*1j] / G^2 if not pwdf_jk.gamma_point(kpts_lst[k]): wjI[k] += numpy.einsum('k,xk->x', vGR[p0:p1], pqkI) wjI[k] -= numpy.einsum('k,xk->x', vGI[p0:p1], pqkR) wjR[k] += numpy.einsum('k,xk->x', vGR[p0:p1], pqkR) wjR[k] += numpy.einsum('k,xk->x', vGI[p0:p1], pqkI) t1 = log.timer_debug1('contracting Vnuc', *t1) if mydf.eta != 0 and cell.dimension == 3: nucbar = sum([z/nuccell.bas_exp(i)[0] for i,z in enumerate(charge)]) nucbar *= numpy.pi/cell.vol * 2 ovlp = cell.pbc_intor('cint1e_kin_sph', 1, lib.HERMITIAN, kpts_lst) for k in range(nkpts): s = lib.pack_tril(ovlp[k]) wjR[k] -= nucbar * s.real wjI[k] -= nucbar * s.imag wj = [] for k, kpt in enumerate(kpts_lst): if pwdf_jk.gamma_point(kpt): wj.append(lib.unpack_tril(wjR[k])) else: wj.append(lib.unpack_tril(wjR[k]+wjI[k]*1j)) if kpts is None or numpy.shape(kpts) == (3,): wj = wj[0] return wj
def make_kpt(uniq_kptji_id, cholesky_j2c): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) j2c, j2c_negative, j2ctag = cholesky_j2c shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1,1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory-mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.38e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.2e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj*buflen*Gblksize, dtype=numpy.complex128) def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d'%(ji,istep)] = v else: feri['j3c/%d/%d'%(ji,istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d'%(ji,istep)] = lib.dot(j2c_negative, v) with lib.call_in_background(pw_contract) as compute: col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.vstack([fswap['j3c-junk/%d/%d'%(idx,i)][0,col0:col1].T for i in range(nsegs)]) # vbar is the interaction between the background charge # and the auxiliary basis. 0D, 1D, 2D do not have vbar. if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None compute(istep, sh_range, j3cR, j3cI) for ji in adapted_ji_idx: del(fswap['j3c-junk/%d'%ji])
def kernel(mc, mo_coeff=None, ci=None, atmlst=None, mf_grad=None, verbose=None): if mo_coeff is None: mo_coeff = mc.mo_coeff if ci is None: ci = mc.ci if mf_grad is None: mf_grad = mc._scf.nuc_grad_method() if mc.frozen is not None: raise NotImplementedError mol = mc.mol ncore = mc.ncore ncas = mc.ncas nocc = ncore + ncas nelecas = mc.nelecas nao, nmo = mo_coeff.shape nao_pair = nao * (nao+1) // 2 mo_occ = mo_coeff[:,:nocc] mo_core = mo_coeff[:,:ncore] mo_cas = mo_coeff[:,ncore:nocc] casdm1, casdm2 = mc.fcisolver.make_rdm12(ci, ncas, nelecas) # gfock = Generalized Fock, Adv. Chem. Phys., 69, 63 dm_core = numpy.dot(mo_core, mo_core.T) * 2 dm_cas = reduce(numpy.dot, (mo_cas, casdm1, mo_cas.T)) aapa = ao2mo.kernel(mol, (mo_cas, mo_cas, mo_occ, mo_cas), compact=False) aapa = aapa.reshape(ncas,ncas,nocc,ncas) vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas)) h1 = mc.get_hcore() vhf_c = vj[0] - vk[0] * .5 vhf_a = vj[1] - vk[1] * .5 gfock = reduce(numpy.dot, (mo_occ.T, h1 + vhf_c + vhf_a, mo_occ)) * 2 gfock[:,ncore:nocc] = reduce(numpy.dot, (mo_occ.T, h1 + vhf_c, mo_cas, casdm1)) gfock[:,ncore:nocc] += numpy.einsum('uviw,vuwt->it', aapa, casdm2) dme0 = reduce(numpy.dot, (mo_occ, (gfock+gfock.T)*.5, mo_occ.T)) aapa = vj = vk = vhf_c = vhf_a = h1 = gfock = None dm1 = dm_core + dm_cas vhf1c, vhf1a = mf_grad.get_veff(mol, (dm_core, dm_cas)) hcore_deriv = mf_grad.hcore_generator(mol) s1 = mf_grad.get_ovlp(mol) diag_idx = numpy.arange(nao) diag_idx = diag_idx * (diag_idx+1) // 2 + diag_idx casdm2_cc = casdm2 + casdm2.transpose(0,1,3,2) dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2,ncas**2), mo_cas.T, (0, nao, 0, nao)).reshape(ncas**2,nao,nao) dm2buf = lib.pack_tril(dm2buf) dm2buf[:,diag_idx] *= .5 dm2buf = dm2buf.reshape(ncas,ncas,nao_pair) casdm2 = casdm2_cc = None if atmlst is None: atmlst = range(mol.natm) aoslices = mol.aoslice_by_atom() de = numpy.zeros((len(atmlst),3)) max_memory = mc.max_memory - lib.current_memory()[0] blksize = int(max_memory*.9e6/8 / ((aoslices[:,3]-aoslices[:,2]).max()*nao_pair)) blksize = min(nao, max(2, blksize)) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = aoslices[ia] h1ao = hcore_deriv(ia) de[k] += numpy.einsum('xij,ij->x', h1ao, dm1) de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], dme0[p0:p1]) * 2 q1 = 0 for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize): q0, q1 = q1, q1 + nf dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1], mo_cas[q0:q1]) shls_slice = (shl0,shl1,b0,b1,0,mol.nbas,0,mol.nbas) eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl', shls_slice=shls_slice).reshape(3,p1-p0,nf,nao_pair) de[k] -= numpy.einsum('xijw,ijw->x', eri1, dm2_ao) * 2 eri1 = None de[k] += numpy.einsum('xij,ij->x', vhf1c[:,p0:p1], dm1[p0:p1]) * 2 de[k] += numpy.einsum('xij,ij->x', vhf1a[:,p0:p1], dm_core[p0:p1]) * 2 de += mf_grad.grad_nuc(mol, atmlst) return de
def dm_for_vj_tril(dm): dmtril = lib.pack_tril(dm + dm.T.conj()) dmtril[i * (i + 1) // 2 + i] *= .5 return dmtril
def get_jk(mol_or_mf, dm, hermi=1): '''MPI version of scf.hf.get_jk function''' #vj = get_j(mol_or_mf, dm, hermi) #vk = get_k(mol_or_mf, dm, hermi) if isinstance(mol_or_mf, gto.mole.Mole): mf = hf.SCF(mol_or_mf).view(SCF) else: mf = mol_or_mf # dm may be too big for mpi4py library to serialize. Broadcast dm here. if any(comm.allgather(isinstance(dm, str) and dm == 'SKIPPED_ARG')): dm = mpi.bcast_tagged_array_occdf(dm) mf.unpack_(comm.bcast(mf.pack())) # initial and final grids level grdlvl_i = 0 grdlvl_f = 1 # norm_ddm threshold for grids change thrd_nddm = 0.03 # set block size to adapt memory sblk = 200 global cond, wao_vx, ngridsx, coordsx, gthrd, dm0 dms = numpy.asarray(dm) dm_shape = dms.shape nao = dm_shape[-1] dms = dms.reshape(-1, nao, nao) nset = dms.shape[0] vj = [0] * nset vk = [0] * nset # DF-J set mf.with_df = mf mol = mf.mol global int2c # use mf.opt to calc int2c once, cond, dm0 if mf.opt is None: mf.opt = mf.init_direct_scf() cond = 0 dm0 = numpy.zeros((nset, nao, nao)) # set auxbasis in input file, need self.auxbasis = None in __init__ of hf.py # mf.auxbasis = 'weigend' auxbasis = mf.auxbasis auxbasis = comm.bcast(auxbasis) mf.auxbasis = comm.bcast(mf.auxbasis) auxmol = df.addons.make_auxmol(mol, auxbasis) # (P|Q) int2c = auxmol.intor('int2c2e', aosym='s1', comp=1) if rank == 0: print('auxmol.basis', auxmol.basis) # coase and fine grids change norm_ddm = 0 for k in range(nset): norm_ddm += numpy.linalg.norm(dms[k] - dm0[k]) dm0 = dms if norm_ddm < thrd_nddm and cond == 2: cond = 1 if cond == 0: wao_vx, ngridsx, coordsx, gthrd = get_gridss(mol, grdlvl_i) if rank == 0: print('grids level at first is', grdlvl_i) cond = 2 elif cond == 1: wao_vx, ngridsx, coordsx, gthrd = get_gridss(mol, grdlvl_f) if rank == 0: print('grids level change to', grdlvl_f) cond = 3 # DF-J dmtril = [] for k in range(nset): dmtril.append(lib.pack_tril(dms[k] + dms[k].T)) i = numpy.arange(nao) dmtril[k][i * (i + 1) // 2 + i] *= .5 rho = [] b0 = 0 for eri1 in loop(mf.with_df): naux, nao_pair = eri1.shape # if rank==0: print('slice-naux',naux,'rank',rank) b1 = b0 + naux assert (nao_pair == nao * (nao + 1) // 2) for k in range(nset): if b0 == 0: rho.append(numpy.empty(paux[rank])) rho[k][b0:b1] = numpy.dot(eri1, dmtril[k]) b0 = b1 orho = [] rec = [] for k in range(nset): orho.append(mpi.gather(rho[k])) if rank == 0: ivj0 = scipy.linalg.solve(int2c, orho[k]) else: ivj0 = None rec.append(numpy.empty(paux[rank])) comm.Scatterv([ivj0, paux], rec[k], root=0) b0 = 0 for eri1 in loop(mf.with_df): naux, nao_pair = eri1.shape b1 = b0 + naux assert (nao_pair == nao * (nao + 1) // 2) for k in range(nset): vj[k] += numpy.dot(rec[k][b0:b1].T, eri1) b0 = b1 for k in range(nset): vj[k] = comm.reduce(vj[k]) # sgX for k in range(nset): # screening from Fg fg = numpy.dot(wao_vx, dms[k]) sngds = [] ss = 0 for i in range(ngridsx): if numpy.amax(numpy.absolute(fg[i, :])) < gthrd: sngds.append(i) ss += 1 if ss < ngridsx: wao_v = numpy.delete(wao_vx, sngds, 0) fg = numpy.delete(fg, sngds, 0) coords = numpy.delete(coordsx, sngds, 0) else: wao_v = wao_vx coords = coordsx # Kuv = Sum(Xug Avt Dkt Xkg) ngrids = coords.shape[0] blksize = min(ngrids, sblk) for i0, i1 in lib.prange(0, ngrids, blksize): bn = batch_nuc(mol, coords[i0:i1]) gbn = bn.swapaxes(0, 2) gv = lib.einsum('gvt,gt->gv', gbn, fg[i0:i1]) vk[k] += lib.einsum('gu,gv->uv', wao_v[i0:i1], gv) sn = lib.einsum('gu,gv->uv', wao_v, wao_v) vk[k] = comm.reduce(vk[k]) sn = comm.reduce(sn) # SSn^-1 for grids to analitic if rank == 0: snsgk = scipy.linalg.solve(sn, vk[k]) ovlp = mol.intor_symmetric('int1e_ovlp') vk[k] = numpy.matmul(ovlp, snsgk) if rank == 0: vj = lib.unpack_tril(numpy.asarray(vj), 1).reshape(dm_shape) vk = numpy.asarray(vk).reshape(dm_shape) return vj, vk
def save_vir_frac(p0, p1, eri): eri = eri.reshape(p1 - p0, nocc, nmo, nmo) eris.vooo[p0:p1] = eri[:, :, :nocc, :nocc] eris.voov[p0:p1] = eri[:, :, :nocc, nocc:] vv = _cp(eri[:, :, nocc:, nocc:].reshape((p1 - p0) * nocc, nvir, nvir)) eris.vovv[p0:p1] = lib.pack_tril(vv).reshape(p1 - p0, nocc, nvir_pair)
def incore(eri, dms, hermi=0, with_j=True, with_k=True): assert (eri.dtype == numpy.double) eri = numpy.asarray(eri, order='C') dms = numpy.asarray(dms, order='C') dms_shape = dms.shape nao = dms_shape[-1] dms = dms.reshape(-1, nao, nao) n_dm = dms.shape[0] vj = vk = None if with_j: vj = numpy.zeros((n_dm, nao, nao)) if with_k: vk = numpy.zeros((n_dm, nao, nao)) dmsptr = [] vjkptr = [] fjkptr = [] npair = nao * (nao + 1) // 2 if eri.ndim == 2 and npair * npair == eri.size: # 4-fold symmetry eri fdrv = getattr(libcvhf, 'CVHFnrs4_incore_drv') if with_j: # 'ijkl,kl->ij' fvj = _fpointer('CVHFics4_kl_s2ij') # or ## 'ijkl,ij->kl' #fvj = _fpointer('CVHFics4_ij_s2kl') for i, dm in enumerate(dms): dmsptr.append(dm.ctypes.data_as(ctypes.c_void_p)) vjkptr.append(vj[i].ctypes.data_as(ctypes.c_void_p)) fjkptr.append(fvj) if with_k: # 'ijkl,il->jk' fvk = _fpointer('CVHFics4_il_s1jk') # or ## 'ijkl,jk->il' #fvk = _fpointer('CVHFics4_jk_s1il') for i, dm in enumerate(dms): dmsptr.append(dm.ctypes.data_as(ctypes.c_void_p)) vjkptr.append(vk[i].ctypes.data_as(ctypes.c_void_p)) fjkptr.append(fvk) elif eri.ndim == 1 and npair * (npair + 1) // 2 == eri.size: # 8-fold symmetry eri fdrv = getattr(libcvhf, 'CVHFnrs8_incore_drv') if with_j: fvj = _fpointer('CVHFics8_tridm_vj') tridms = lib.pack_tril(lib.hermi_sum(dms, axes=(0, 2, 1))) idx = numpy.arange(nao) tridms[:, idx * (idx + 1) // 2 + idx] *= .5 for i, tridm in enumerate(tridms): dmsptr.append(tridm.ctypes.data_as(ctypes.c_void_p)) vjkptr.append(vj[i].ctypes.data_as(ctypes.c_void_p)) fjkptr.append(fvj) if with_k: if hermi == 1: fvk = _fpointer('CVHFics8_jk_s2il') else: fvk = _fpointer('CVHFics8_jk_s1il') for i, dm in enumerate(dms): dmsptr.append(dm.ctypes.data_as(ctypes.c_void_p)) vjkptr.append(vk[i].ctypes.data_as(ctypes.c_void_p)) fjkptr.append(fvk) else: raise RuntimeError('Array shape not consistent: DM %s, eri %s' % (dms_shape, eri.shape)) n_ops = len(dmsptr) fdrv(eri.ctypes.data_as(ctypes.c_void_p), (ctypes.c_void_p * n_ops)(*dmsptr), (ctypes.c_void_p * n_ops)(*vjkptr), ctypes.c_int(n_ops), ctypes.c_int(nao), (ctypes.c_void_p * n_ops)(*fjkptr)) if with_j: for i in range(n_dm): lib.hermi_triu(vj[i], 1, inplace=True) vj = vj.reshape(dms_shape) if with_k: if hermi != 0: for i in range(n_dm): lib.hermi_triu(vk[i], hermi, inplace=True) vk = vk.reshape(dms_shape) return vj, vk
def get_pnucp(mydf, kpts=None): cell = mydf.cell if kpts is None: kpts_lst = numpy.zeros((1, 3)) else: kpts_lst = numpy.reshape(kpts, (-1, 3)) log = logger.Logger(mydf.stdout, mydf.verbose) t1 = t0 = (time.clock(), time.time()) nkpts = len(kpts_lst) nao = cell.nao_nr() nao_pair = nao * (nao + 1) // 2 Gv, Gvbase, kws = cell.get_Gv_weights(mydf.gs) charge = -cell.atom_charges() kpt_allow = numpy.zeros(3) coulG = tools.get_coulG(cell, kpt_allow, gs=mydf.gs, Gv=Gv) coulG *= kws if mydf.eta == 0: wj = numpy.zeros((nkpts, nao_pair), dtype=numpy.complex128) wjI = numpy.zeros((nkpts, nao_pair)) SI = cell.get_SI(Gv) vG = numpy.einsum('i,ix->x', charge, SI) * coulG else: nuccell = copy.copy(cell) half_sph_norm = .5 / numpy.sqrt(numpy.pi) norm = half_sph_norm / mole._gaussian_int(2, mydf.eta) chg_env = [mydf.eta, norm] ptr_eta = cell._env.size ptr_norm = ptr_eta + 1 chg_bas = [[ia, 0, 1, 1, 0, ptr_eta, ptr_norm, 0] for ia in range(cell.natm)] nuccell._atm = cell._atm nuccell._bas = numpy.asarray(chg_bas, dtype=numpy.int32) nuccell._env = numpy.hstack((cell._env, chg_env)) wj = lib.asarray( mydf._int_nuc_vloc(nuccell, kpts_lst, 'int3c2e_pvp1_sph')) t1 = log.timer_debug1('pnucp pass1: analytic int', *t1) aoaux = ft_ao.ft_ao(nuccell, Gv) vG = numpy.einsum('i,xi->x', charge, aoaux) * coulG max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) for aoaoks, p0, p1 in mydf.ft_loop(mydf.gs, kpt_allow, kpts_lst, max_memory=max_memory, aosym='s2', intor='GTO_ft_pdotp_sph'): for k, aoao in enumerate(aoaoks): if aft_jk.gamma_point(kpts_lst[k]): wj[k] += numpy.einsum('k,kx->x', vG[p0:p1].real, aoao.real) wj[k] += numpy.einsum('k,kx->x', vG[p0:p1].imag, aoao.imag) else: wj[k] += numpy.einsum('k,kx->x', vG[p0:p1].conj(), aoao) t1 = log.timer_debug1('contracting pnucp', *t1) if mydf.eta != 0 and cell.dimension == 3: nucbar = sum( [-z / nuccell.bas_exp(i)[0] for i, z in enumerate(charge)]) nucbar *= numpy.pi / cell.vol * 2 # 2 due to the factor 1/2 in T ovlp = cell.pbc_intor('int1e_kin_sph', 1, lib.HERMITIAN, kpts_lst) for k in range(nkpts): s = lib.pack_tril(ovlp[k]) wj[k] += nucbar * s wj_kpts = [] for k, kpt in enumerate(kpts_lst): if aft_jk.gamma_point(kpt): wj_kpts.append(lib.unpack_tril(wj[k].real.copy())) else: wj_kpts.append(lib.unpack_tril(wj[k])) if kpts is None or numpy.shape(kpts) == (3, ): wj_kpts = wj_kpts[0] return numpy.asarray(wj_kpts)
def get_nuc(mydf, kpts=None): cell = mydf.cell if kpts is None: kpts_lst = numpy.zeros((1,3)) else: kpts_lst = numpy.reshape(kpts, (-1,3)) log = logger.Logger(mydf.stdout, mydf.verbose) t1 = t0 = (time.clock(), time.time()) nkpts = len(kpts_lst) nao = cell.nao_nr() nao_pair = nao * (nao+1) // 2 Gv, Gvbase, kws = cell.get_Gv_weights(mydf.gs) kpt_allow = numpy.zeros(3) if mydf.eta == 0: vpplocG = pseudo.pp_int.get_gth_vlocG_part1(cell, Gv) vpplocG = -numpy.einsum('ij,ij->j', cell.get_SI(Gv), vpplocG) vpplocG *= kws vGR = vpplocG.real vGI = vpplocG.imag vjR = numpy.zeros((nkpts,nao_pair)) vjI = numpy.zeros((nkpts,nao_pair)) else: nuccell = copy.copy(cell) half_sph_norm = .5/numpy.sqrt(numpy.pi) norm = half_sph_norm/gto.mole._gaussian_int(2, mydf.eta) chg_env = [mydf.eta, norm] ptr_eta = cell._env.size ptr_norm = ptr_eta + 1 chg_bas = [[ia, 0, 1, 1, 0, ptr_eta, ptr_norm, 0] for ia in range(cell.natm)] nuccell._atm = cell._atm nuccell._bas = numpy.asarray(chg_bas, dtype=numpy.int32) nuccell._env = numpy.hstack((cell._env, chg_env)) # PP-loc part1 is handled by fakenuc in _int_nuc_vloc vj = lib.asarray(mydf._int_nuc_vloc(nuccell, kpts_lst)) vjR = vj.real vjI = vj.imag t1 = log.timer_debug1('vnuc pass1: analytic int', *t1) charge = -cell.atom_charges() coulG = tools.get_coulG(cell, kpt_allow, gs=mydf.gs, Gv=Gv) coulG *= kws aoaux = ft_ao.ft_ao(nuccell, Gv) vGR = numpy.einsum('i,xi->x', charge, aoaux.real) * coulG vGI = numpy.einsum('i,xi->x', charge, aoaux.imag) * coulG max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) for k, pqkR, pqkI, p0, p1 \ in mydf.ft_loop(mydf.gs, kpt_allow, kpts_lst, max_memory=max_memory, aosym='s2'): # rho_ij(G) nuc(-G) / G^2 # = [Re(rho_ij(G)) + Im(rho_ij(G))*1j] [Re(nuc(G)) - Im(nuc(G))*1j] / G^2 if not gamma_point(kpts_lst[k]): vjI[k] += numpy.einsum('k,xk->x', vGR[p0:p1], pqkI) vjI[k] -= numpy.einsum('k,xk->x', vGI[p0:p1], pqkR) vjR[k] += numpy.einsum('k,xk->x', vGR[p0:p1], pqkR) vjR[k] += numpy.einsum('k,xk->x', vGI[p0:p1], pqkI) t1 = log.timer_debug1('contracting Vnuc', *t1) if mydf.eta != 0 and cell.dimension == 3: nucbar = sum([z/nuccell.bas_exp(i)[0] for i,z in enumerate(charge)]) nucbar *= numpy.pi/cell.vol ovlp = cell.pbc_intor('cint1e_ovlp_sph', 1, lib.HERMITIAN, kpts_lst) for k in range(nkpts): s = lib.pack_tril(ovlp[k]) vjR[k] -= nucbar * s.real vjI[k] -= nucbar * s.imag vj = [] for k, kpt in enumerate(kpts_lst): if gamma_point(kpt): vj.append(lib.unpack_tril(vjR[k])) else: vj.append(lib.unpack_tril(vjR[k]+vjI[k]*1j)) if kpts is None or numpy.shape(kpts) == (3,): vj = vj[0] return vj
def solve_df_rdm2 (mc_or_mc_grad, mo_cas=None, ci=None, casdm2=None): ''' Solve (P|Q)d_Qij = (P|kl)d_ijkl for d_Qij in the MO basis. Args: mc_or_mc_grad: DF-MCSCF energy or gradients method object. Kwargs: mo_cas: ndarray, tuple, or list containing active mo coefficients. if two ndarrays mo_cas = (mo0, mo1) are provided, mo0 and mo1 are assumed to correspond to casdm2's LAST two dimensions in that order, regardless of len (ci) or len (casdm2). (This will facilitate SA-CASSCF gradients at some point. Note the difference from grad_elec_dferi!) ci: ndarray, tuple, or list containing CI coefficients in mo_cas basis. Not used if casdm2 is provided. casdm2: ndarray, tuple, or list containing rdm2 in mo_cas basis. Computed by mc_or_mc_grad.fcisolver.make_rdm12 (ci,...) if omitted. compact: bool If true, tries to return d_Pqr in lower-triangular form if possible Returns: dfcasdm2: ndarray or list containing 3-center 2RDM, d_Pqr, where P is auxbasis index and q, r are mo_cas basis indices. ''' # Initialize mol and auxmol mol = mc_or_mc_grad.mol if isinstance (mc_or_mc_grad, GradientsBasics): mc = mc_or_mc_grad.base else: mc = mc_or_mc_grad auxmol = mc.with_df.auxmol if auxmol is None: auxmol = df.addons.make_auxmol(mc.with_df.mol, mc.with_df.auxbasis) naux = auxmol.nao ncore, ncas, nelecas = mc.ncore, mc.ncas, mc.nelecas nocc = ncore + ncas # Initialize casdm2, mo_cas, and nset if mo_cas is None: mo_cas = mc.mo_coeff[:,ncore:nocc] if ci is None: ci = mc.ci if casdm2 is None: casdm2 = mc.fcisolver.make_rdm12 (ci, ncas, nelecas) if np.asarray (casdm2).ndim == 4: casdm2 = [casdm2] nset = len (casdm2) # (P|Q) and (P|ij) int2c = linalg.cho_factor(auxmol.intor('int2c2e', aosym='s1')) int3c = get_int3c_mo (mol, auxmol, mo_cas, compact=True, max_memory=mc_or_mc_grad.max_memory) # Solve (P|Q) d_Qij = (P|kl) d_ijkl dfcasdm2 = [] for dm2 in casdm2: nmo = tuple (dm2.shape) # make sure it copies if int3c.ndim == 2: # I'm not going to use the memory-efficient version because this is meant to be small nmo_pair = nmo[2] * (nmo[2] + 1) // 2 dm2 = dm2.copy ().reshape ((-1, nmo[2], nmo[3])) dm2 += dm2.transpose (0,2,1) diag_idx = np.arange(nmo[-1]) diag_idx = diag_idx * (diag_idx+1) // 2 + diag_idx dm2 = lib.pack_tril (np.ascontiguousarray (dm2)) dm2[:,diag_idx] *= 0.5 elif int3c.ndim == 3: nmo_pair = nmo[2] * nmo[3] int3c = int3c.reshape (naux, nmo_pair) else: raise RuntimeError ('int3c.shape = {}'.format (int3c.shape)) dm2 = dm2.reshape (nmo[0]*nmo[1], nmo_pair).T int3c_dm2 = np.dot (int3c, dm2) dfcasdm2.append (linalg.cho_solve (int2c, int3c_dm2).reshape (naux, nmo[0], nmo[1])) return dfcasdm2
def grad_elec_auxresponse_dferi (mc_grad, mo_cas=None, ci=None, dfcasdm2=None, casdm2=None, atmlst=None, max_memory=None, dferi=None, incl_2c=True): ''' Evaluate the [(P'|ij) + (P'|Q) g_Qij] d_Pij contribution to the electronic gradient, where d_Pij is the DF-2RDM obtained by solve_df_rdm2 and g_Qij solves (P|Q) g_Qij = (P|ij). The caller must symmetrize if necessary (i.e., (P|Q) d_Qij = (P|kl) d_ijkl <-> (P|Q) d_Qkl = (P|ij) d_ijkl in order to get at Q'). Args: mc_grad: MC-SCF gradients method object Kwargs: mc_cas: ndarray, list, or tuple containing active-space MO coefficients If a tuple of length 2, the same pair of MO sets are assumed to apply to the internally-contracted and externally-contracted indices of the DF-2rdm: (P|Q)d_Qij = (P|kl)d_ijkl -> (P|Q)d_Qij = (P|ij)d_ijij If a tuple of length 4, the 4 MO sets are applied to ijkl above in that order (first two external, last two internal). ci: ndarray, tuple, or list containing CI coefficients in mo_cas basis. Not used if dfcasdm2 is provided. dfcasdm2: ndarray, tuple, or list containing DF-2rdm in mo_cas basis. Computed by solve_df_rdm2 if omitted. casdm2: ndarray, tuple, or list containing rdm2 in mo_cas basis. Computed by mc_grad.fcisolver.make_rdm12 (ci,...) if omitted. atmlst: list of integers List of nonfrozen atoms, as in grad_elec functions. Defaults to list (range (mol.natm)) max_memory: int Maximum memory usage in MB dferi: ndarray containing g_Pij for optional precalculation incl_2c: bool If False, omit the terms depending on (P'|Q) Returns: dE: list of ndarray of shape (len (atmlst), 3) ''' if isinstance (mc_grad, GradientsBasics): mc = mc_grad.base else: mc = mc_grad mol = mc_grad.mol auxmol = mc.with_df.auxmol ncore, ncas, nao, naux, nbas = mc.ncore, mc.ncas, mol.nao, auxmol.nao, mol.nbas nocc = ncore + ncas npair = nao * (nao + 1) // 2 if mo_cas is None: mo_cas = mc.mo_coeff[:,ncore:nocc] if max_memory is None: max_memory = mc.max_memory if isinstance (mo_cas, np.ndarray) and mo_cas.ndim == 2: mo_cas = (mo_cas,)*4 elif len (mo_cas) == 2: mo_cas = (mo_cas[0], mo_cas[1], mo_cas[0], mo_cas[1]) elif len (mo_cas) == 4: mo_cas = tuple (mo_cas) else: raise RuntimeError ('Invalid shape of np.asarray (mo_cas): {}'.format (mo_cas.shape)) nmo = [mo.shape[1] for mo in mo_cas] if atmlst is None: atmlst = list (range (mol.natm)) if ci is None: ci = mc.ci if dfcasdm2 is None: dfcasdm2 = solve_df_rdm2 (mc, mo_cas=mo_cas[2:], ci=ci, casdm2=casdm2) # d_Pij = (P|Q)^{-1} (Q|kl) d_ijkl nset = len (dfcasdm2) dE = np.zeros ((nset, naux, 3)) dfcasdm2 = np.array (dfcasdm2) # Shape dfcasdm2 mosym, nmo_pair, mo_conc, mo_slice = _conc_mos(mo_cas[0], mo_cas[1], compact=True) if 's2' in mosym: assert (nmo[0] == nmo[1]), 'How did I get {} with nmo[0] = {} and nmo[1] = {}'.format (mosym, nmo[0], nmo[1]) dfcasdm2 = dfcasdm2.reshape (nset*naux, nmo[0], nmo[1]) dfcasdm2 += dfcasdm2.transpose (0,2,1) diag_idx = np.arange(nmo[0]) diag_idx = diag_idx * (diag_idx+1) // 2 + diag_idx dfcasdm2 = lib.pack_tril (np.ascontiguousarray (dfcasdm2)) dfcasdm2[:,diag_idx] *= 0.5 dfcasdm2 = dfcasdm2.reshape (nset, naux, nmo_pair) # Do 2c part. Assume memory is no object if incl_2c: int2c = auxmol.intor('int2c2e_ip1') if (dferi is None): dferi = solve_df_eri (mc, mo_cas=mo_cas[:2]).reshape (naux, nmo_pair) # g_Pij = (P|Q)^{-1} (Q|ij) int3c = np.dot (int2c, dferi) # (P'|Q) g_Qij dE += lib.einsum ('npi,xpi->npx', dfcasdm2, int3c) # d_Pij (P'|Q) g_Qij int2c = int3c = dferi = None # Set up 3c part get_int3c = _int3c_wrapper(mol, auxmol, 'int3c2e_ip2', 's2ij') max_memory -= lib.current_memory()[0] blklen = 6*npair blksize = int (min (max (max_memory * 1e6 / 8 / blklen, 20), 240)) aux_loc = auxmol.ao_loc aux_ranges = balance_partition(aux_loc, blksize) # Iterate over auxbasis range and do 3c part for shl0, shl1, nL in aux_ranges: p0, p1 = aux_loc[shl0], aux_loc[shl1] int3c = get_int3c ((0, nbas, 0, nbas, shl0, shl1)) # (uv|P'); shape = (3,npair,p1-p0) int3c = np.ascontiguousarray (int3c.transpose (0,2,1).reshape (3*(p1-p0), npair)) int3c = _ao2mo.nr_e2(int3c, mo_conc, mo_slice, aosym='s2', mosym=mosym) int3c = int3c.reshape (3,p1-p0,nmo_pair) int3c = np.ascontiguousarray (int3c) dE[:,p0:p1,:] -= lib.einsum ('npi,xpi->npx', dfcasdm2[:,p0:p1,:], int3c) # Ravel to atoms auxslices = auxmol.aoslice_by_atom () dE = np.array ([dE[:,p0:p1].sum (axis=1) for p0, p1 in auxslices[:,2:]]).transpose (1,0,2) return np.ascontiguousarray (dE)
def energy_elec_dferi (mc, mo_cas=None, ci=None, dfcasdm2=None, casdm2=None): ''' Evaluate E2 = (P|ij) d_Pij / 2, where d_Pij is the DF-2rdm obtained by solve_df_rdm2. For testing purposes. Note that the only index permutation this function understands is (P|ij) = (P|ji) if i and j span the same range of MOs. The caller has to handle everything else, including, for instance, multiplication by 2 if a nonsymmetric slice of the 2RDM is used. Args: mc: MC-SCF energy method object Kwargs: mo_cas: ndarray, list, or tuple containing active-space MO coefficients If a tuple of length 2, the same pair of MO sets are assumed to apply to the internally-contracted and externally-contracted indices of the DF-2rdm: (P|Q)d_Qij = (P|kl)d_ijkl -> (P|Q)d_Qij = (P|ij)d_ijij If a tuple of length 4, the 4 MO sets are applied to ijkl above in that order (first two external, last two internal). ci: ndarray, tuple, or list containing CI coefficients in mo_cas basis. Not used if dfcasdm2 is provided. dfcasdm2: ndarray, tuple, or list containing DF-2rdm in mo_cas basis. Computed by solve_df_rdm2 if omitted. casdm2: ndarray, tuple, or list containing rdm2 in mo_cas basis. Computed by mc_or_mc_grad.fcisolver.make_rdm12 (ci,...) if omitted. Returns: energy: list List of energies corresponding to the dfcasdm2s, E = (P|ij) d_Pij / 2 = (P|ij) (P|Q)^-1 (Q|kl) d_ijkl / 2 ''' if isinstance (mc, GradientsBasics): mc = mc.base if mo_cas is None: ncore = mc.ncore nocc = ncore + mc.ncas mo_cas = mc.mo_coeff[:,ncore:nocc] if isinstance (mo_cas, np.ndarray) and mo_cas.ndim == 2: mo_cas = (mo_cas,)*4 elif len (mo_cas) == 2: mo_cas = (mo_cas[0], mo_cas[1], mo_cas[0], mo_cas[1]) elif len (mo_cas) == 4: mo_cas = tuple (mo_cas) else: raise RuntimeError ('Invalid shape of np.asarray (mo_cas): {}'.format (mo_cas.shape)) nmo = [mo.shape[1] for mo in mo_cas] if ci is None: ci = mc.ci if dfcasdm2 is None: dfcasdm2 = solve_df_rdm2 (mc, mo_cas=mo_cas[2:], ci=ci, casdm2=casdm2) int3c = get_int3c_mo (mc.mol, mc.with_df.auxmol, mo_cas[:2], compact=True, max_memory=mc.max_memory) symm = (int3c.ndim == 2) int3c = np.ravel (int3c) energy = [] for dm2 in dfcasdm2: naux = mc.with_df.auxmol.nao if symm: nmo_pair = nmo[0] * (nmo[0] + 1) // 2 dm2 += dm2.transpose (0,2,1) diag_idx = np.arange(nmo[1]) diag_idx = diag_idx * (diag_idx+1) // 2 + diag_idx dm2 = lib.pack_tril (np.ascontiguousarray (dm2)) dm2[:,diag_idx] *= 0.5 else: nmo_pair = nmo[0] * nmo[1] energy.append (np.dot (int3c, dm2.ravel ()) / 2) return energy
def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) Gaux *= mydf.weighted_coulG(kpt, False, gs).reshape(-1, 1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') j2c = numpy.asarray(feri['j2c/%d' % uniq_kptji_id]) try: j2c = scipy.linalg.cholesky(j2c, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that gs is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) w, v = scipy.linalg.eigh(j2c) log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id) log.debug('cond = %.4g, drop %d bfns', w[-1] / w[0], numpy.count_nonzero(w < mydf.linear_dep_threshold)) v = v[:, w > mydf.linear_dep_threshold].T.conj() v /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1) j2c = v j2ctag = 'eig' naux0 = j2c.shape[0] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min( max(int(max_memory * .6 * 1e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max( 16, int(max_memory * .2 * 1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max( 16, int(max_memory * .4 * 1e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngs, 16384) pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj * buflen * Gblksize, dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1 + ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.asarray(feri['j3c/%d' % idx][:, col0:col1]) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None shls_slice = (bstart, bend, 0, bend) for p0, p1 in lib.prange(0, ngs, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d' % ji][:naux0, col0:col1] = v del (feri['j2c/%d' % uniq_kptji_id]) for k, ji in enumerate(adapted_ji_idx): v = feri['j3c/%d' % ji][:naux0] del (feri['j3c/%d' % ji]) feri['j3c/%d' % ji] = v
def test_pack_tril_integer(self): a = lib.pack_tril(numpy.arange(9, dtype=numpy.int32).reshape(3,3)) self.assertTrue(numpy.array_equal(a, numpy.array((0,3,4,6,7,8)))) self.assertTrue(a.dtype == numpy.int32)
def get_jk(dfobj, dm, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e-13): assert (with_j or with_k) if (not with_k and not dfobj.mol.incore_anyway and # 3-center integral tensor is not initialized dfobj._cderi is None): return get_j(dfobj, dm, hermi, direct_scf_tol), None t0 = t1 = (time.clock(), time.time()) log = logger.Logger(dfobj.stdout, dfobj.verbose) fmmm = _ao2mo.libao2mo.AO2MOmmm_bra_nr_s2 fdrv = _ao2mo.libao2mo.AO2MOnr_e2_drv ftrans = _ao2mo.libao2mo.AO2MOtranse2_nr_s2 null = lib.c_null_ptr() dms = numpy.asarray(dm) dm_shape = dms.shape nao = dm_shape[-1] dms = dms.reshape(-1, nao, nao) nset = dms.shape[0] vj = 0 vk = numpy.zeros_like(dms) if with_j: idx = numpy.arange(nao) dmtril = lib.pack_tril(dms + dms.conj().transpose(0, 2, 1)) dmtril[:, idx * (idx + 1) // 2 + idx] *= .5 if not with_k: for eri1 in dfobj.loop(): rho = numpy.einsum('ix,px->ip', dmtril, eri1) vj += numpy.einsum('ip,px->ix', rho, eri1) elif getattr(dm, 'mo_coeff', None) is not None: #TODO: test whether dm.mo_coeff matching dm mo_coeff = numpy.asarray(dm.mo_coeff, order='F') mo_occ = numpy.asarray(dm.mo_occ) nmo = mo_occ.shape[-1] mo_coeff = mo_coeff.reshape(-1, nao, nmo) mo_occ = mo_occ.reshape(-1, nmo) if mo_occ.shape[0] * 2 == nset: # handle ROHF DM mo_coeff = numpy.vstack((mo_coeff, mo_coeff)) mo_occa = numpy.array(mo_occ > 0, dtype=numpy.double) mo_occb = numpy.array(mo_occ == 2, dtype=numpy.double) assert (mo_occa.sum() + mo_occb.sum() == mo_occ.sum()) mo_occ = numpy.vstack((mo_occa, mo_occb)) orbo = [] for k in range(nset): c = numpy.einsum('pi,i->pi', mo_coeff[k][:, mo_occ[k] > 0], numpy.sqrt(mo_occ[k][mo_occ[k] > 0])) orbo.append(numpy.asarray(c, order='F')) max_memory = dfobj.max_memory - lib.current_memory()[0] blksize = max(4, int(min(dfobj.blockdim, max_memory * .3e6 / 8 / nao**2))) buf = numpy.empty((blksize * nao, nao)) for eri1 in dfobj.loop(blksize): naux, nao_pair = eri1.shape assert (nao_pair == nao * (nao + 1) // 2) if with_j: rho = numpy.einsum('ix,px->ip', dmtril, eri1) vj += numpy.einsum('ip,px->ix', rho, eri1) for k in range(nset): nocc = orbo[k].shape[1] if nocc > 0: buf1 = buf[:naux * nocc] fdrv(ftrans, fmmm, buf1.ctypes.data_as(ctypes.c_void_p), eri1.ctypes.data_as(ctypes.c_void_p), orbo[k].ctypes.data_as(ctypes.c_void_p), ctypes.c_int(naux), ctypes.c_int(nao), (ctypes.c_int * 4)(0, nocc, 0, nao), null, ctypes.c_int(0)) vk[k] += lib.dot(buf1.T, buf1) t1 = log.timer_debug1('jk', *t1) else: #:vk = numpy.einsum('pij,jk->pki', cderi, dm) #:vk = numpy.einsum('pki,pkj->ij', cderi, vk) rargs = (ctypes.c_int(nao), (ctypes.c_int * 4)(0, nao, 0, nao), null, ctypes.c_int(0)) dms = [numpy.asarray(x, order='F') for x in dms] max_memory = dfobj.max_memory - lib.current_memory()[0] blksize = max( 4, int(min(dfobj.blockdim, max_memory * .22e6 / 8 / nao**2))) buf = numpy.empty((2, blksize, nao, nao)) for eri1 in dfobj.loop(blksize): naux, nao_pair = eri1.shape if with_j: rho = numpy.einsum('ix,px->ip', dmtril, eri1) vj += numpy.einsum('ip,px->ix', rho, eri1) for k in range(nset): buf1 = buf[0, :naux] fdrv(ftrans, fmmm, buf1.ctypes.data_as(ctypes.c_void_p), eri1.ctypes.data_as(ctypes.c_void_p), dms[k].ctypes.data_as(ctypes.c_void_p), ctypes.c_int(naux), *rargs) buf2 = lib.unpack_tril(eri1, out=buf[1]) vk[k] += lib.dot( buf1.reshape(-1, nao).T, buf2.reshape(-1, nao)) t1 = log.timer_debug1('jk', *t1) if with_j: vj = lib.unpack_tril(vj, 1).reshape(dm_shape) if with_k: vk = vk.reshape(dm_shape) logger.timer(dfobj, 'df vj and vk', *t0) return vj, vk
def Lci_dot_dgci_dx(Lci, weights, mc, mo_coeff=None, ci=None, atmlst=None, mf_grad=None, eris=None, verbose=None): ''' Modification of pyscf.grad.casscf.kernel to compute instead the CI Lagrange term nuclear gradient (sum_IJ Lci_IJ d2_Ecas/d_lambda d_PIJ) This involves removing all core-core and nuclear-nuclear terms and making the substitution sum_I w_I<L_I|p'q|I> + c.c. -> <0|p'q|0> sum_I w_I<L_I|p'r'sq|I> + c.c. -> <0|p'r'sq|0> The active-core terms (sum_I w_I<L_I|x'iyi|I>, sum_I w_I <L_I|x'iiy|I>, c.c.) must be retained.''' if mo_coeff is None: mo_coeff = mc.mo_coeff if ci is None: ci = mc.ci if mf_grad is None: mf_grad = mc._scf.nuc_grad_method() if mc.frozen is not None: raise NotImplementedError t0 = (logger.process_clock(), logger.perf_counter()) mol = mc.mol ncore = mc.ncore ncas = mc.ncas nocc = ncore + ncas nelecas = mc.nelecas nao, nmo = mo_coeff.shape nao_pair = nao * (nao + 1) // 2 mo_occ = mo_coeff[:, :nocc] mo_core = mo_coeff[:, :ncore] mo_cas = mo_coeff[:, ncore:nocc] # MRH: TDMs + c.c. instead of RDMs; 06/30/2020: new interface in mcscf.addons makes this much more transparent casdm1, casdm2 = mc.fcisolver.trans_rdm12(Lci, ci, ncas, nelecas) casdm1 += casdm1.transpose(1, 0) casdm2 += casdm2.transpose(1, 0, 3, 2) # gfock = Generalized Fock, Adv. Chem. Phys., 69, 63 dm_core = np.dot(mo_core, mo_core.T) * 2 dm_cas = reduce(np.dot, (mo_cas, casdm1, mo_cas.T)) aapa = np.zeros((ncas, ncas, nmo, ncas), dtype=dm_cas.dtype) for i in range(nmo): aapa[:, :, i, :] = eris.ppaa[i][ncore:nocc, :, :].transpose(1, 2, 0) vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas)) h1 = mc.get_hcore() vhf_c = vj[0] - vk[0] * .5 vhf_a = vj[1] - vk[1] * .5 # MRH: delete h1 + vhf_c from the first line below (core and core-core stuff) # Also extend gfock to span the whole space gfock = np.zeros_like(dm_cas) gfock[:, :nocc] = reduce(np.dot, (mo_coeff.T, vhf_a, mo_occ)) * 2 gfock[:, ncore:nocc] = reduce(np.dot, (mo_coeff.T, h1 + vhf_c, mo_cas, casdm1)) gfock[:, ncore:nocc] += np.einsum('uvpw,vuwt->pt', aapa, casdm2) dme0 = reduce(np.dot, (mo_coeff, (gfock + gfock.T) * .5, mo_coeff.T)) aapa = vj = vk = vhf_c = vhf_a = h1 = gfock = None vj, vk = mf_grad.get_jk(mol, (dm_core, dm_cas)) vhf1c, vhf1a = vj - vk * 0.5 #vhf1c, vhf1a = mf_grad.get_veff(mol, (dm_core, dm_cas)) hcore_deriv = mf_grad.hcore_generator(mol) s1 = mf_grad.get_ovlp(mol) diag_idx = np.arange(nao) diag_idx = diag_idx * (diag_idx + 1) // 2 + diag_idx casdm2_cc = casdm2 + casdm2.transpose(0, 1, 3, 2) dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2, ncas**2), mo_cas.T, (0, nao, 0, nao)).reshape(ncas**2, nao, nao) dm2buf = lib.pack_tril(dm2buf) dm2buf[:, diag_idx] *= .5 dm2buf = dm2buf.reshape(ncas, ncas, nao_pair) casdm2 = casdm2_cc = None if atmlst is None: atmlst = range(mol.natm) aoslices = mol.aoslice_by_atom() de_hcore = np.zeros((len(atmlst), 3)) de_renorm = np.zeros((len(atmlst), 3)) de_eri = np.zeros((len(atmlst), 3)) de = np.zeros((len(atmlst), 3)) max_memory = mc.max_memory - lib.current_memory()[0] blksize = int(max_memory * .9e6 / 8 / (4 * (aoslices[:, 3] - aoslices[:, 2]).max() * nao_pair)) # MRH: 3 components of eri array and 1 density matrix array: FOUR arrays of this size are required! blksize = min(nao, max(2, blksize)) logger.info( mc, 'SA-CASSCF Lci_dot_dgci memory remaining for eri manipulation: {} MB; using blocksize = {}' .format(max_memory, blksize)) t0 = logger.timer(mc, 'SA-CASSCF Lci_dot_dgci 1-electron part', *t0) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = aoslices[ia] h1ao = hcore_deriv(ia) # MRH: dm1 -> dm_cas in the line below de_hcore[k] += np.einsum('xij,ij->x', h1ao, dm_cas) de_renorm[k] -= np.einsum('xij,ij->x', s1[:, p0:p1], dme0[p0:p1]) * 2 q1 = 0 for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize): q0, q1 = q1, q1 + nf dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1], mo_cas[q0:q1]) shls_slice = (shl0, shl1, b0, b1, 0, mol.nbas, 0, mol.nbas) gc.collect() eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl', shls_slice=shls_slice).reshape( 3, p1 - p0, nf, nao_pair) de_eri[k] -= np.einsum('xijw,ijw->x', eri1, dm2_ao) * 2 eri1 = dm2_ao = None gc.collect() t0 = logger.timer( mc, 'SA-CASSCF Lci_dot_dgci atom {} ({},{}|{})'.format( ia, p1 - p0, nf, nao_pair), *t0) # MRH: dm1 -> dm_cas in the line below. Also eliminate core-core terms de_eri[k] += np.einsum('xij,ij->x', vhf1c[:, p0:p1], dm_cas[p0:p1]) * 2 de_eri[k] += np.einsum('xij,ij->x', vhf1a[:, p0:p1], dm_core[p0:p1]) * 2 logger.debug(mc, "CI lagrange hcore component:\n{}".format(de_hcore)) logger.debug(mc, "CI lagrange renorm component:\n{}".format(de_renorm)) logger.debug(mc, "CI lagrange eri component:\n{}".format(de_eri)) de = de_hcore + de_renorm + de_eri return de
def _add_vvvv_tril(mycc, t1T, t2T, eris, out=None, with_ovvv=None): '''Ht2 = numpy.einsum('ijcd,acdb->ijab', t2, vvvv) Using symmetry t2[ijab] = t2[jiba] and Ht2[ijab] = Ht2[jiba], compute the lower triangular part of Ht2 ''' time0 = time.clock(), time.time() log = logger.Logger(mycc.stdout, mycc.verbose) if with_ovvv is None: with_ovvv = mycc.direct nvir_seg, nvir, nocc = t2T.shape[:3] vloc0, vloc1 = _task_location(nvir, rank) nocc2 = nocc * (nocc + 1) // 2 if t1T is None: tau = lib.pack_tril(t2T.reshape(nvir_seg * nvir, nocc, nocc)) else: tau = t2T + numpy.einsum('ai,bj->abij', t1T[vloc0:vloc1], t1T) tau = lib.pack_tril(tau.reshape(nvir_seg * nvir, nocc, nocc)) tau = tau.reshape(nvir_seg, nvir, nocc2) if mycc.direct: # AO-direct CCSD mo = getattr(eris, 'mo_coeff', None) if mo is None: # If eris does not have the attribute mo_coeff mo = _mo_without_core(mycc, mycc.mo_coeff) tau_shape = tau.shape ao_loc = mycc.mol.ao_loc_nr() orbv = mo[:, nocc:] nao, nvir = orbv.shape ntasks = mpi.pool.size task_sh_locs = lib.misc._balanced_partition(ao_loc, ntasks) ao_loc0 = ao_loc[task_sh_locs[rank]] ao_loc1 = ao_loc[task_sh_locs[rank + 1]] tau = lib.einsum('pb,abx->apx', orbv, tau) tau_priv = numpy.zeros((ao_loc1 - ao_loc0, nao, nocc2)) for task_id, tau in _rotate_tensor_block(tau): loc0, loc1 = _task_location(nvir, task_id) tau_priv += lib.einsum('pa,abx->pbx', orbv[ao_loc0:ao_loc1, loc0:loc1], tau) tau = None time1 = log.timer_debug1('vvvv-tau mo2ao', *time0) buf = _contract_vvvv_t2(mycc, None, tau_priv, task_sh_locs, None, log) buf = buf_ao = buf.reshape(tau_priv.shape) tau_priv = None time1 = log.timer_debug1('vvvv-tau contraction', *time1) buf = lib.einsum('apx,pb->abx', buf, orbv) Ht2tril = numpy.ndarray((nvir_seg, nvir, nocc2), buffer=out) Ht2tril[:] = 0 for task_id, buf in _rotate_tensor_block(buf): ao_loc0 = ao_loc[task_sh_locs[task_id]] ao_loc1 = ao_loc[task_sh_locs[task_id + 1]] Ht2tril += lib.einsum('pa,pbx->abx', orbv[ao_loc0:ao_loc1, vloc0:vloc1], buf) time1 = log.timer_debug1('vvvv-tau ao2mo', *time1) if with_ovvv: #: tmp = numpy.einsum('ijcd,ak,kdcb->ijba', tau, t1T, eris.ovvv) #: t2new -= tmp + tmp.transpose(1,0,3,2) orbo = mo[:, :nocc] buf = lib.einsum('apx,pi->axi', buf_ao, orbo) tmp = numpy.zeros((nvir_seg, nocc2, nocc)) for task_id, buf in _rotate_tensor_block(buf): ao_loc0 = ao_loc[task_sh_locs[task_id]] ao_loc1 = ao_loc[task_sh_locs[task_id + 1]] tmp += lib.einsum('pa,pxi->axi', orbv[ao_loc0:ao_loc1, vloc0:vloc1], buf) Ht2tril -= lib.einsum('axi,bi->abx', tmp, t1T) tmp = buf = None t1_ao = numpy.dot(orbo, t1T[vloc0:vloc1].T) buf = lib.einsum('apx,pb->abx', buf_ao, orbv) for task_id, buf in _rotate_tensor_block(buf): ao_loc0 = ao_loc[task_sh_locs[task_id]] ao_loc1 = ao_loc[task_sh_locs[task_id + 1]] Ht2tril -= lib.einsum('pa,pbx->abx', t1_ao[ao_loc0:ao_loc1], buf) time1 = log.timer_debug1('contracting vvvv-tau', *time0) else: raise NotImplementedError return Ht2tril
def Lorb_dot_dgorb_dx(Lorb, mc, mo_coeff=None, ci=None, atmlst=None, mf_grad=None, eris=None, verbose=None): ''' Modification of pyscf.grad.casscf.kernel to compute instead the orbital Lagrange term nuclear gradient (sum_pq Lorb_pq d2_Ecas/d_lambda d_kpq) This involves removing nuclear-nuclear terms and making the substitution (D_[p]q + D_p[q]) -> D_pq (d_[p]qrs + d_pq[r]s + d_p[q]rs + d_pqr[s]) -> d_pqrs Where [] around an index implies contraction with Lorb from the left, so that the external index (regardless of whether the index on the rdm is bra or ket) is always the first index of Lorb. ''' # dmo = smoT.dao.smo # dao = mo.dmo.moT t0 = (logger.process_clock(), logger.perf_counter()) if mo_coeff is None: mo_coeff = mc.mo_coeff if ci is None: ci = mc.ci if mf_grad is None: mf_grad = mc._scf.nuc_grad_method() if mc.frozen is not None: raise NotImplementedError mol = mc.mol ncore = mc.ncore ncas = mc.ncas nocc = ncore + ncas nelecas = mc.nelecas nao, nmo = mo_coeff.shape nao_pair = nao * (nao + 1) // 2 mo_core = mo_coeff[:, :ncore] mo_cas = mo_coeff[:, ncore:nocc] # MRH: new 'effective' MO coefficients including contraction from the Lagrange multipliers moL_coeff = np.dot(mo_coeff, Lorb) s0_inv = np.dot(mo_coeff, mo_coeff.T) moL_core = moL_coeff[:, :ncore] moL_cas = moL_coeff[:, ncore:nocc] # MRH: these SHOULD be state-averaged! Use the actual sacasscf object! casdm1, casdm2 = mc.fcisolver.make_rdm12(ci, ncas, nelecas) # gfock = Generalized Fock, Adv. Chem. Phys., 69, 63 # MRH: each index exactly once! dm_core = np.dot(mo_core, mo_core.T) * 2 dm_cas = reduce(np.dot, (mo_cas, casdm1, mo_cas.T)) # MRH: new density matrix terms dmL_core = np.dot(moL_core, mo_core.T) * 2 dmL_cas = reduce(np.dot, (moL_cas, casdm1, mo_cas.T)) dmL_core += dmL_core.T dmL_cas += dmL_cas.T dm1 = dm_core + dm_cas dm1L = dmL_core + dmL_cas # MRH: end new density matrix terms # MRH: wrap the integral instead of the density matrix. I THINK the sign is the same! # mo sets 0 and 2 should be transposed, 1 and 3 should be not transposed; this will lead to correct sign # Except I can't do this for the external index, because the external index is contracted to ovlp matrix, # not the 2RDM aapa = np.zeros((ncas, ncas, nmo, ncas), dtype=dm_cas.dtype) aapaL = np.zeros((ncas, ncas, nmo, ncas), dtype=dm_cas.dtype) for i in range(nmo): jbuf = eris.ppaa[i] kbuf = eris.papa[i] aapa[:, :, i, :] = jbuf[ncore:nocc, :, :].transpose(1, 2, 0) aapaL[:, :, i, :] += np.tensordot(jbuf, Lorb[:, ncore:nocc], axes=((0), (0))) kbuf = np.tensordot(kbuf, Lorb[:, ncore:nocc], axes=((1), (0))).transpose(1, 2, 0) aapaL[:, :, i, :] += kbuf + kbuf.transpose(1, 0, 2) # MRH: new vhf terms vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas)) vjL, vkL = mc._scf.get_jk(mol, (dmL_core, dmL_cas)) h1 = mc.get_hcore() vhf_c = vj[0] - vk[0] * .5 vhf_a = vj[1] - vk[1] * .5 vhfL_c = vjL[0] - vkL[0] * .5 vhfL_a = vjL[1] - vkL[1] * .5 # MRH: I rewrote this Feff calculation completely, double-check it gfock = np.dot(h1, dm1L) # h1e gfock += np.dot((vhf_c + vhf_a), dmL_core) # core-core and active-core, 2nd 1RDM linked gfock += np.dot((vhfL_c + vhfL_a), dm_core) # core-core and active-core, 1st 1RDM linked gfock += np.dot(vhfL_c, dm_cas) # core-active, 1st 1RDM linked gfock += np.dot(vhf_c, dmL_cas) # core-active, 2nd 1RDM linked gfock = np.dot( s0_inv, gfock ) # Definition of quantity is in MO's; going (AO->MO->AO) incurs an inverse ovlp gfock += reduce(np.dot, (mo_coeff, np.einsum( 'uviw,uvtw->it', aapaL, casdm2), mo_cas.T)) # active-active # MRH: I have to contract this external 2RDM index explicitly on the 2RDM but fortunately I can do so here gfock += reduce( np.dot, (mo_coeff, np.einsum('uviw,vuwt->it', aapa, casdm2), moL_cas.T)) # MRH: As of 04/18/2019, the two-body part of this is including aapaL is definitely, unambiguously correct dme0 = (gfock + gfock.T) / 2 # This transpose is for the overlap matrix later on aapa = vj = vk = vhf_c = vhf_a = None vj, vk = mf_grad.get_jk(mol, (dm_core, dm_cas, dmL_core, dmL_cas)) vhf1c, vhf1a, vhf1cL, vhf1aL = vj - vk * 0.5 #vhf1c, vhf1a, vhf1cL, vhf1aL = mf_grad.get_veff(mol, (dm_core, dm_cas, dmL_core, dmL_cas)) hcore_deriv = mf_grad.hcore_generator(mol) s1 = mf_grad.get_ovlp(mol) diag_idx = np.arange(nao) diag_idx = diag_idx * (diag_idx + 1) // 2 + diag_idx casdm2_cc = casdm2 + casdm2.transpose(0, 1, 3, 2) dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2, ncas**2), mo_cas.T, (0, nao, 0, nao)).reshape(ncas**2, nao, nao) # MRH: contract the final two indices of the active-active 2RDM with L as you change to AOs # note tensordot always puts indices in the order of the arguments. dm2Lbuf = np.zeros((ncas**2, nmo, nmo)) # MRH: The second line below transposes the L; the third line transposes the derivative later on # Both the L and the derivative have to explore all indices dm2Lbuf[:, :, ncore:nocc] = np.tensordot( Lorb[:, ncore:nocc], casdm2, axes=(1, 2)).transpose(1, 2, 0, 3).reshape(ncas**2, nmo, ncas) dm2Lbuf[:, ncore:nocc, :] += np.tensordot( Lorb[:, ncore:nocc], casdm2, axes=(1, 3)).transpose(1, 2, 3, 0).reshape(ncas**2, ncas, nmo) dm2Lbuf += dm2Lbuf.transpose(0, 2, 1) dm2Lbuf = np.ascontiguousarray(dm2Lbuf) dm2Lbuf = ao2mo._ao2mo.nr_e2(dm2Lbuf.reshape(ncas**2, nmo**2), mo_coeff.T, (0, nao, 0, nao)).reshape(ncas**2, nao, nao) dm2buf = lib.pack_tril(dm2buf) dm2buf[:, diag_idx] *= .5 dm2buf = dm2buf.reshape(ncas, ncas, nao_pair) dm2Lbuf = lib.pack_tril(dm2Lbuf) dm2Lbuf[:, diag_idx] *= .5 dm2Lbuf = dm2Lbuf.reshape(ncas, ncas, nao_pair) if atmlst is None: atmlst = list(range(mol.natm)) aoslices = mol.aoslice_by_atom() de_hcore = np.zeros((len(atmlst), 3)) de_renorm = np.zeros((len(atmlst), 3)) de_eri = np.zeros((len(atmlst), 3)) de = np.zeros((len(atmlst), 3)) max_memory = mc.max_memory - lib.current_memory()[0] blksize = int(max_memory * .9e6 / 8 / (4 * (aoslices[:, 3] - aoslices[:, 2]).max() * nao_pair)) # MRH: 3 components of eri array and 1 density matrix array: FOUR arrays of this size are required! blksize = min(nao, max(2, blksize)) logger.info( mc, 'SA-CASSCF Lorb_dot_dgorb memory remaining for eri manipulation: {} MB; using blocksize = {}' .format(max_memory, blksize)) t0 = logger.timer(mc, 'SA-CASSCF Lorb_dot_dgorb 1-electron part', *t0) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = aoslices[ia] h1ao = hcore_deriv(ia) # MRH: h1e and Feff terms de_hcore[k] += np.einsum('xij,ij->x', h1ao, dm1L) de_renorm[k] -= np.einsum('xij,ij->x', s1[:, p0:p1], dme0[p0:p1]) * 2 q1 = 0 for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize): q0, q1 = q1, q1 + nf dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2Lbuf, mo_cas[p0:p1], mo_cas[q0:q1]) # MRH: now contract the first two indices of the active-active 2RDM with L as you go from MOs to AOs dm2_ao += lib.einsum('ijw,pi,qj->pqw', dm2buf, moL_cas[p0:p1], mo_cas[q0:q1]) dm2_ao += lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1], moL_cas[q0:q1]) shls_slice = (shl0, shl1, b0, b1, 0, mol.nbas, 0, mol.nbas) gc.collect() eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl', shls_slice=shls_slice).reshape( 3, p1 - p0, nf, nao_pair) # MRH: I still don't understand why there is a minus here! de_eri[k] -= np.einsum('xijw,ijw->x', eri1, dm2_ao) * 2 eri1 = dm2_ao = None gc.collect() t0 = logger.timer( mc, 'SA-CASSCF Lorb_dot_dgorb atom {} ({},{}|{})'.format( ia, p1 - p0, nf, nao_pair), *t0) # MRH: core-core and core-active 2RDM terms de_eri[k] += np.einsum('xij,ij->x', vhf1c[:, p0:p1], dm1L[p0:p1]) * 2 de_eri[k] += np.einsum('xij,ij->x', vhf1cL[:, p0:p1], dm1[p0:p1]) * 2 # MRH: active-core 2RDM terms de_eri[k] += np.einsum('xij,ij->x', vhf1a[:, p0:p1], dmL_core[p0:p1]) * 2 de_eri[k] += np.einsum('xij,ij->x', vhf1aL[:, p0:p1], dm_core[p0:p1]) * 2 # MRH: deleted the nuclear-nuclear part to avoid double-counting # lesson learned from debugging - mol.intor computes -1 * the derivative and only # for one index # on the other hand, mf_grad.hcore_generator computes the actual derivative of # h1 for both indices and with the correct sign logger.debug(mc, "Orb lagrange hcore component:\n{}".format(de_hcore)) logger.debug(mc, "Orb lagrange renorm component:\n{}".format(de_renorm)) logger.debug(mc, "Orb lagrange eri component:\n{}".format(de_eri)) de = de_hcore + de_renorm + de_eri return de
def get_jk(agf2, eri, rdm1, with_j=True, with_k=True): ''' Get the J/K matrices. Args: eri : ndarray or H5 dataset Electronic repulsion integrals (NOT as _ChemistsERIs). In the case of no bra/ket symmetry, a tuple can be passed. rdm1 : 2D array Reduced density matrix Kwargs: with_j : bool Whether to compute J. Default value is True with_k : bool Whether to compute K. Default value is True Returns: tuple of ndarrays corresponding to J and K, if either are not requested then they are set to None. ''' nmo = rdm1.shape[0] npair = nmo * (nmo + 1) // 2 naux = agf2.with_df.get_naoaux() vj = vk = None if with_j: rdm1_tril = lib.pack_tril(rdm1 + np.tril(rdm1, k=-1)) vj = np.zeros((npair, )) if with_k: vk = np.zeros((nmo, nmo)) fdrv = ao2mo._ao2mo.libao2mo.AO2MOnr_e2_drv fmmm = ao2mo._ao2mo.libao2mo.AO2MOmmm_bra_nr_s2 ftrans = ao2mo._ao2mo.libao2mo.AO2MOtranse2_nr_s2 if isinstance(eri, tuple): bra, ket = eri else: bra = ket = eri blksize = _agf2.get_blksize(agf2.max_memory, (npair, npair, 1, nmo**2, nmo**2)) blksize = min(nmo, max(BLKMIN, blksize)) logger.debug1(agf2, 'blksize (dfragf2.get_jk) = %d' % blksize) buf = (np.empty((blksize, nmo, nmo)), np.empty((blksize, nmo, nmo))) for p0, p1 in mpi_helper.prange(0, naux, blksize): bra0 = bra[p0:p1] ket0 = ket[p0:p1] rho = np.dot(ket0, rdm1_tril) if with_j: vj += np.dot(rho, bra0) if with_k: buf1 = buf[0][:p1 - p0] fdrv(ftrans, fmmm, buf1.ctypes.data_as(ctypes.c_void_p), bra0.ctypes.data_as(ctypes.c_void_p), rdm1.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(p1 - p0), ctypes.c_int(nmo), (ctypes.c_int * 4)(0, nmo, 0, nmo), lib.c_null_ptr(), ctypes.c_int(0)) buf2 = lib.unpack_tril(ket0, out=buf[1]) buf1 = buf1.reshape(-1, nmo) buf2 = buf2.reshape(-1, nmo) vk = lib.dot(buf1.T, buf2, c=vk, beta=1) if with_j: mpi_helper.barrier() mpi_helper.allreduce_safe_inplace(vj) mpi_helper.barrier() vj = lib.unpack_tril(vj) if with_k: mpi_helper.barrier() mpi_helper.allreduce_safe_inplace(vk) return vj, vk
def kernel(mp, t2, atmlst=None, mf_grad=None, verbose=logger.INFO): if mf_grad is None: mf_grad = mp._scf.nuc_grad_method() log = logger.new_logger(mp, verbose) time0 = time.clock(), time.time() log.debug('Build mp2 rdm1 intermediates') d1 = mp2._gamma1_intermediates(mp, t2) doo, dvv = d1 time1 = log.timer_debug1('rdm1 intermediates', *time0) # Set nocc, nvir for half-transformation of 2pdm. Frozen orbitals are exculded. # nocc, nvir should be updated to include the frozen orbitals when proceeding # the 1-particle quantities later. mol = mp.mol with_frozen = not (mp.frozen is None or mp.frozen is 0) OA, VA, OF, VF = _index_frozen_active(mp.get_frozen_mask(), mp.mo_occ) orbo = mp.mo_coeff[:,OA] orbv = mp.mo_coeff[:,VA] nao, nocc = orbo.shape nvir = orbv.shape[1] # Partially transform MP2 density matrix and hold it in memory # The rest transformation are applied during the contraction to ERI integrals part_dm2 = _ao2mo.nr_e2(t2.reshape(nocc**2,nvir**2), numpy.asarray(orbv.T, order='F'), (0,nao,0,nao), 's1', 's1').reshape(nocc,nocc,nao,nao) part_dm2 = (part_dm2.transpose(0,2,3,1) * 4 - part_dm2.transpose(0,3,2,1) * 2) hf_dm1 = mp._scf.make_rdm1(mp.mo_coeff, mp.mo_occ) if atmlst is None: atmlst = range(mol.natm) offsetdic = mol.offset_nr_by_atom() diagidx = numpy.arange(nao) diagidx = diagidx*(diagidx+1)//2 + diagidx de = numpy.zeros((len(atmlst),3)) Imat = numpy.zeros((nao,nao)) fdm2 = lib.H5TmpFile() vhf1 = fdm2.create_dataset('vhf1', (len(atmlst),3,nao,nao), 'f8') # 2e AO integrals dot 2pdm max_memory = max(0, mp.max_memory - lib.current_memory()[0]) blksize = max(1, int(max_memory*.9e6/8/(nao**3*2.5))) Imat1 = 0 Imat2 = 0 for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = offsetdic[ia] ip1 = p0 vhf = numpy.zeros((3,nao,nao)) for b0, b1, nf in _shell_prange(mol, shl0, shl1, blksize): ip0, ip1 = ip1, ip1 + nf dm2buf = lib.einsum('pi,iqrj->pqrj', orbo[ip0:ip1], part_dm2) dm2buf+= lib.einsum('qi,iprj->pqrj', orbo, part_dm2[:,ip0:ip1]) dm2buf = lib.einsum('pqrj,sj->pqrs', dm2buf, orbo) dm2buf = dm2buf + dm2buf.transpose(0,1,3,2) dm2buf = lib.pack_tril(dm2buf.reshape(-1,nao,nao)).reshape(nf,nao,-1) dm2buf[:,:,diagidx] *= .5 shls_slice = (b0,b1,0,mol.nbas,0,mol.nbas,0,mol.nbas) eri0 = mol.intor('int2e', aosym='s2kl', shls_slice=shls_slice) Imat += lib.einsum('ipx,iqx->pq', eri0.reshape(nf,nao,-1), dm2buf) eri0 = None eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl', shls_slice=shls_slice).reshape(3,nf,nao,-1) de[k] -= numpy.einsum('xijk,ijk->x', eri1, dm2buf) * 2 dm2buf = None # HF part for i in range(3): eri1tmp = lib.unpack_tril(eri1[i]).reshape(nf*nao,-1) eri1tmp = eri1tmp.reshape(nf,nao,nao,nao) vhf[i] += numpy.einsum('ijkl,ij->kl', eri1tmp, hf_dm1[ip0:ip1]) vhf[i] -= numpy.einsum('ijkl,il->kj', eri1tmp, hf_dm1[ip0:ip1]) * .5 vhf[i,ip0:ip1] += numpy.einsum('ijkl,kl->ij', eri1tmp, hf_dm1) vhf[i,ip0:ip1] -= numpy.einsum('ijkl,jk->il', eri1tmp, hf_dm1) * .5 eri1 = eri1tmp = None vhf1[k] = vhf log.debug('2e-part grad of atom %d %s = %s', ia, mol.atom_symbol(ia), de[k]) time1 = log.timer_debug1('2e-part grad of atom %d'%ia, *time1) # Recompute nocc, nvir to include the frozen orbitals and make contraction for # the 1-particle quantities, see also the kernel function in ccsd_grad module. mo_coeff = mp.mo_coeff mo_energy = mp._scf.mo_energy nao, nmo = mo_coeff.shape nocc = numpy.count_nonzero(mp.mo_occ > 0) Imat = reduce(numpy.dot, (mo_coeff.T, Imat, mp._scf.get_ovlp(), mo_coeff)) * -1 dm1mo = numpy.zeros((nmo,nmo)) if with_frozen: dco = Imat[OF[:,None],OA] / (mo_energy[OF,None] - mo_energy[OA]) dfv = Imat[VF[:,None],VA] / (mo_energy[VF,None] - mo_energy[VA]) dm1mo[OA[:,None],OA] = doo + doo.T dm1mo[OF[:,None],OA] = dco dm1mo[OA[:,None],OF] = dco.T dm1mo[VA[:,None],VA] = dvv + dvv.T dm1mo[VF[:,None],VA] = dfv dm1mo[VA[:,None],VF] = dfv.T else: dm1mo[:nocc,:nocc] = doo + doo.T dm1mo[nocc:,nocc:] = dvv + dvv.T dm1 = reduce(numpy.dot, (mo_coeff, dm1mo, mo_coeff.T)) vhf = mp._scf.get_veff(mp.mol, dm1) * 2 Xvo = reduce(numpy.dot, (mo_coeff[:,nocc:].T, vhf, mo_coeff[:,:nocc])) Xvo+= Imat[:nocc,nocc:].T - Imat[nocc:,:nocc] dm1mo += _response_dm1(mp, Xvo) time1 = log.timer_debug1('response_rdm1 intermediates', *time1) Imat[nocc:,:nocc] = Imat[:nocc,nocc:].T im1 = reduce(numpy.dot, (mo_coeff, Imat, mo_coeff.T)) time1 = log.timer_debug1('response_rdm1', *time1) log.debug('h1 and JK1') hcore_deriv = mf_grad.hcore_generator(mol) s1 = mf_grad.get_ovlp(mol) zeta = lib.direct_sum('i+j->ij', mo_energy, mo_energy) * .5 zeta[nocc:,:nocc] = mo_energy[:nocc] zeta[:nocc,nocc:] = mo_energy[:nocc].reshape(-1,1) zeta = reduce(numpy.dot, (mo_coeff, zeta*dm1mo, mo_coeff.T)) dm1 = reduce(numpy.dot, (mo_coeff, dm1mo, mo_coeff.T)) p1 = numpy.dot(mo_coeff[:,:nocc], mo_coeff[:,:nocc].T) vhf_s1occ = reduce(numpy.dot, (p1, mp._scf.get_veff(mol, dm1+dm1.T), p1)) time1 = log.timer_debug1('h1 and JK1', *time1) # Hartree-Fock part contribution dm1p = hf_dm1 + dm1*2 dm1 += hf_dm1 zeta += mf_grad.make_rdm1e(mo_energy, mo_coeff, mp.mo_occ) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = offsetdic[ia] # s[1] dot I, note matrix im1 is not hermitian de[k] += numpy.einsum('xij,ij->x', s1[:,p0:p1], im1[p0:p1]) de[k] += numpy.einsum('xji,ij->x', s1[:,p0:p1], im1[:,p0:p1]) # h[1] \dot DM, contribute to f1 h1ao = hcore_deriv(ia) de[k] += numpy.einsum('xij,ji->x', h1ao, dm1) # -s[1]*e \dot DM, contribute to f1 de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], zeta[p0:p1] ) de[k] -= numpy.einsum('xji,ij->x', s1[:,p0:p1], zeta[:,p0:p1]) # -vhf[s_ij[1]], contribute to f1, *2 for s1+s1.T de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], vhf_s1occ[p0:p1]) * 2 de[k] -= numpy.einsum('xij,ij->x', vhf1[k], dm1p) de += mf_grad.grad_nuc(mol) log.timer('%s gradients' % mp.__class__.__name__, *time0) return de
def __init__(self, myci, mo_coeff=None, method='incore'): cput0 = (time.clock(), time.time()) moidx = numpy.ones(myci.mo_occ.size, dtype=numpy.bool) if isinstance(myci.frozen, (int, numpy.integer)): moidx[:myci.frozen] = False elif len(myci.frozen) > 0: moidx[numpy.asarray(myci.frozen)] = False if mo_coeff is None: self.mo_coeff = mo_coeff = myci.mo_coeff[:, moidx] else: self.mo_coeff = mo_coeff = mo_coeff[:, moidx] dm = myci._scf.make_rdm1(myci.mo_coeff, myci.mo_occ) fockao = myci._scf.get_hcore() + myci._scf.get_veff(myci.mol, dm) self.fock = reduce(numpy.dot, (mo_coeff.T, fockao, mo_coeff)) nocc = myci.nocc nmo = myci.nmo nvir = nmo - nocc mem_incore, mem_outcore, mem_basic = ccsd._mem_usage(nocc, nvir) mem_now = lib.current_memory()[0] log = logger.Logger(myci.stdout, myci.verbose) if (method == 'incore' and myci._scf._eri is not None and (mem_incore + mem_now < myci.max_memory) or myci.mol.incore_anyway): eri1 = ao2mo.incore.full(myci._scf._eri, mo_coeff) #:eri1 = ao2mo.restore(1, eri1, nmo) #:self.oooo = eri1[:nocc,:nocc,:nocc,:nocc].copy() #:self.ooov = eri1[:nocc,:nocc,:nocc,nocc:].copy() #:self.vooo = eri1[nocc:,:nocc,:nocc,:nocc].copy() #:self.voov = eri1[nocc:,:nocc,:nocc,nocc:].copy() #:self.vvoo = eri1[nocc:,nocc:,:nocc,:nocc].copy() #:vovv = eri1[nocc:,:nocc,nocc:,nocc:].copy() #:self.vovv = lib.pack_tril(vovv.reshape(-1,nvir,nvir)) #:self.vvvv = ao2mo.restore(4, eri1[nocc:,nocc:,nocc:,nocc:], nvir) nvir_pair = nvir * (nvir + 1) // 2 self.oooo = numpy.empty((nocc, nocc, nocc, nocc)) self.ooov = numpy.empty((nocc, nocc, nocc, nvir)) self.vooo = numpy.empty((nvir, nocc, nocc, nocc)) self.voov = numpy.empty((nvir, nocc, nocc, nvir)) self.vovv = numpy.empty((nvir, nocc, nvir_pair)) self.vvvv = numpy.empty((nvir_pair, nvir_pair)) ij = 0 outbuf = numpy.empty((nmo, nmo, nmo)) oovv = numpy.empty((nocc, nocc, nvir, nvir)) for i in range(nocc): buf = lib.unpack_tril(eri1[ij:ij + i + 1], out=outbuf[:i + 1]) for j in range(i + 1): self.oooo[i, j] = self.oooo[j, i] = buf[j, :nocc, :nocc] self.ooov[i, j] = self.ooov[j, i] = buf[j, :nocc, nocc:] oovv[i, j] = oovv[j, i] = buf[j, nocc:, nocc:] ij += i + 1 self.vvoo = lib.transpose(oovv.reshape(nocc**2, -1)).reshape( nvir, nvir, nocc, nocc) oovv = None ij1 = 0 for i in range(nocc, nmo): buf = lib.unpack_tril(eri1[ij:ij + i + 1], out=outbuf[:i + 1]) self.vooo[i - nocc] = buf[:nocc, :nocc, :nocc] self.voov[i - nocc] = buf[:nocc, :nocc, nocc:] lib.pack_tril(_cp(buf[:nocc, nocc:, nocc:]), out=self.vovv[i - nocc]) dij = i - nocc + 1 lib.pack_tril(_cp(buf[nocc:i + 1, nocc:, nocc:]), out=self.vvvv[ij1:ij1 + dij]) ij += i + 1 ij1 += dij else: cput1 = time.clock(), time.time() self.feri1 = lib.H5TmpFile() orbo = mo_coeff[:, :nocc] orbv = mo_coeff[:, nocc:] nvpair = nvir * (nvir + 1) // 2 self.oooo = self.feri1.create_dataset('oooo', (nocc, nocc, nocc, nocc), 'f8') self.ooov = self.feri1.create_dataset('ooov', (nocc, nocc, nocc, nvir), 'f8') self.vvoo = self.feri1.create_dataset('vvoo', (nvir, nvir, nocc, nocc), 'f8') self.vooo = self.feri1.create_dataset('vooo', (nvir, nocc, nocc, nocc), 'f8') self.voov = self.feri1.create_dataset('voov', (nvir, nocc, nocc, nvir), 'f8') self.vovv = self.feri1.create_dataset('vovv', (nvir, nocc, nvpair), 'f8') fsort = _ccsd.libcc.CCsd_sort_inplace nocc_pair = nocc * (nocc + 1) // 2 nvir_pair = nvir * (nvir + 1) // 2 def sort_inplace(p0, p1, eri): fsort(eri.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nocc), ctypes.c_int(nvir), ctypes.c_int((p1 - p0) * nocc)) vv = eri[:, :nvir_pair] oo = eri[:, nvir_pair:nvir_pair + nocc_pair] ov = eri[:, nvir_pair + nocc_pair:].reshape(-1, nocc, nvir) return oo, ov, vv buf = numpy.empty((nmo, nmo, nmo)) oovv = numpy.empty((nocc, nocc, nvir, nvir)) def save_occ_frac(p0, p1, eri): oo, ov, vv = sort_inplace(p0, p1, eri) self.oooo[p0:p1] = lib.unpack_tril(oo, out=buf).reshape( p1 - p0, nocc, nocc, nocc) self.ooov[p0:p1] = ov.reshape(p1 - p0, nocc, nocc, nvir) oovv[p0:p1] = lib.unpack_tril(vv, out=buf).reshape( p1 - p0, nocc, nvir, nvir) def save_vir_frac(p0, p1, eri): oo, ov, vv = sort_inplace(p0, p1, eri) self.vooo[p0:p1] = lib.unpack_tril(oo, out=buf).reshape( p1 - p0, nocc, nocc, nocc) self.voov[p0:p1] = ov.reshape(p1 - p0, nocc, nocc, nvir) self.vovv[p0:p1] = vv.reshape(p1 - p0, nocc, -1) if not myci.direct: max_memory = max(2000, myci.max_memory - lib.current_memory()[0]) self.feri2 = lib.H5TmpFile() ao2mo.full(myci.mol, orbv, self.feri2, max_memory=max_memory, verbose=log) self.vvvv = self.feri2['eri_mo'] cput1 = log.timer_debug1('transforming vvvv', *cput1) tmpfile3 = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) with h5py.File(tmpfile3.name, 'w') as feri: max_memory = max(2000, myci.max_memory - lib.current_memory()[0]) mo = numpy.hstack((orbv, orbo)) ao2mo.general(myci.mol, (mo, orbo, mo, mo), feri, max_memory=max_memory, verbose=log) cput1 = log.timer_debug1('transforming oppp', *cput1) blksize = max( 1, int(min(8e9, max_memory * .5e6) / 8 / nmo**2 / nocc)) handler = None for p0, p1 in lib.prange(0, nvir, blksize): eri = _cp(feri['eri_mo'][p0 * nocc:p1 * nocc]) handler = async_do(handler, save_vir_frac, p0, p1, eri) for p0, p1 in lib.prange(0, nocc, blksize): eri = _cp(feri['eri_mo'][(p0 + nvir) * nocc:(p1 + nvir) * nocc]) handler = async_do(handler, save_occ_frac, p0, p1, eri) if handler is not None: handler.join() self.vvoo[:] = lib.transpose(oovv.reshape(nocc**2, -1)).reshape( nvir, nvir, nocc, nocc) log.timer('CISD integral transformation', *cput0)
def restore(symmetry, eri, norb, tao=None): r'''Convert the 2e integrals (in Chemist's notation) between different level of permutation symmetry (8-fold, 4-fold, or no symmetry) Args: symmetry : int or str code to present the target symmetry of 2e integrals | 's8' or '8' or 8 : 8-fold symmetry | 's4' or '4' or 4 : 4-fold symmetry | 's1' or '1' or 1 : no symmetry | 's2ij' or '2ij' : symmetric ij pair for (ij|kl) (TODO) | 's2ij' or '2kl' : symmetric kl pair for (ij|kl) (TODO) Note the 4-fold symmetry requires (ij|kl) == (ij|lk) == (ij|lk) while (ij|kl) != (kl|ij) is not required. eri : ndarray The symmetry of eri is determined by the size of eri and norb norb : int The symmetry of eri is determined by the size of eri and norb Returns: ndarray. The shape depends on the target symmetry. | 8 : (norb*(norb+1)/2)*(norb*(norb+1)/2+1)/2 | 4 : (norb*(norb+1)/2, norb*(norb+1)/2) | 1 : (norb, norb, norb, norb) Examples: >>> from pyscf import gto >>> from pyscf.scf import _vhf >>> from pyscf import ao2mo >>> mol = gto.M(atom='O 0 0 0; H 0 1 0; H 0 0 1', basis='sto3g') >>> eri = mol.intor('int2e') >>> eri1 = ao2mo.restore(1, eri, mol.nao_nr()) >>> eri4 = ao2mo.restore(4, eri, mol.nao_nr()) >>> eri8 = ao2mo.restore(8, eri, mol.nao_nr()) >>> print(eri1.shape) (7, 7, 7, 7) >>> print(eri1.shape) (28, 28) >>> print(eri1.shape) (406,) ''' targetsym = _stand_sym_code(symmetry) if targetsym not in ('8', '4', '1', '2kl', '2ij'): raise ValueError('symmetry = %s' % symmetry) if eri.dtype != numpy.double: raise RuntimeError('Complex integrals not supported') eri = numpy.asarray(eri, order='C') npair = norb*(norb+1)//2 if eri.size == norb**4: # s1 if targetsym == '1': return eri.reshape(norb,norb,norb,norb) elif targetsym == '2kl': eri = lib.pack_tril(eri.reshape(norb**2,norb,norb)) return eri.reshape(norb,norb,npair) elif targetsym == '2ij': eri = lib.pack_tril(eri.reshape(norb,norb,norb**2), axis=0) return eri.reshape(npair,norb,norb) else: return _convert('1', targetsym, eri, norb) elif eri.size == npair**2: # s4 if targetsym == '4': return eri.reshape(npair,npair) elif targetsym == '8': return lib.pack_tril(eri.reshape(npair,npair)) elif targetsym == '2kl': return lib.unpack_tril(eri, lib.SYMMETRIC, axis=0) elif targetsym == '2ij': return lib.unpack_tril(eri, lib.SYMMETRIC, axis=-1) else: return _convert('4', targetsym, eri, norb) elif eri.size == npair*(npair+1)//2: # 8-fold if targetsym == '8': return eri.ravel() elif targetsym == '4': return lib.unpack_tril(eri.ravel(), lib.SYMMETRIC) elif targetsym == '2kl': return lib.unpack_tril(lib.unpack_tril(eri.ravel()), lib.SYMMETRIC, axis=0) elif targetsym == '2ij': return lib.unpack_tril(lib.unpack_tril(eri.ravel()), lib.SYMMETRIC, axis=-1) else: return _convert('8', targetsym, eri, norb) elif eri.size == npair*norb**2 and eri.shape[0] == npair: # s2ij if targetsym == '2ij': return eri.reshape(npair,norb,norb) elif targetsym == '8': eri = lib.pack_tril(eri.reshape(npair,norb,norb)) return lib.pack_tril(eri) elif targetsym == '4': return lib.pack_tril(eri.reshape(npair,norb,norb)) elif targetsym == '1': eri = lib.unpack_tril(eri.reshape(npair,norb**2), lib.SYMMETRIC, axis=0) return eri.reshape(norb,norb,norb,norb) elif targetsym == '2kl': tril2sq = lib.square_mat_in_trilu_indices(norb) trilidx = numpy.tril_indices(norb) eri = lib.take_2d(eri.reshape(npair,norb**2), tril2sq.ravel(), trilidx[0]*norb+trilidx[1]) return eri.reshape(norb,norb,npair) elif eri.size == npair*norb**2 and eri.shape[-1] == npair: # s2kl if targetsym == '2kl': return eri.reshape(norb,norb,npair) elif targetsym == '8': eri = lib.pack_tril(eri.reshape(norb,norb,npair), axis=0) return lib.pack_tril(eri) elif targetsym == '4': return lib.pack_tril(eri.reshape(norb,norb,npair), axis=0) elif targetsym == '1': eri = lib.unpack_tril(eri.reshape(norb**2,npair), lib.SYMMETRIC, axis=-1) return eri.reshape(norb,norb,norb,norb) elif targetsym == '2ij': tril2sq = lib.square_mat_in_trilu_indices(norb) trilidx = numpy.tril_indices(norb) eri = lib.take_2d(eri.reshape(norb**2,npair), trilidx[0]*norb+trilidx[1], tril2sq.ravel()) return eri.reshape(npair,norb,norb) else: raise RuntimeError('eri.size = %d, norb = %d' % (eri.size, norb))
def get_pnucp(mydf, kpts=None): cell = mydf.cell if kpts is None: kpts_lst = numpy.zeros((1,3)) else: kpts_lst = numpy.reshape(kpts, (-1,3)) log = logger.Logger(mydf.stdout, mydf.verbose) t1 = t0 = (time.clock(), time.time()) nkpts = len(kpts_lst) nao = cell.nao_nr() nao_pair = nao * (nao+1) // 2 Gv, Gvbase, kws = cell.get_Gv_weights(mydf.mesh) charge = -cell.atom_charges() kpt_allow = numpy.zeros(3) coulG = tools.get_coulG(cell, kpt_allow, mesh=mydf.mesh, Gv=Gv) coulG *= kws if mydf.eta == 0: wj = numpy.zeros((nkpts,nao_pair), dtype=numpy.complex128) SI = cell.get_SI(Gv) vG = numpy.einsum('i,ix->x', charge, SI) * coulG wj = numpy.zeros((nkpts,nao_pair), dtype=numpy.complex128) else: nuccell = copy.copy(cell) half_sph_norm = .5/numpy.sqrt(numpy.pi) norm = half_sph_norm/mole.gaussian_int(2, mydf.eta) chg_env = [mydf.eta, norm] ptr_eta = cell._env.size ptr_norm = ptr_eta + 1 chg_bas = [[ia, 0, 1, 1, 0, ptr_eta, ptr_norm, 0] for ia in range(cell.natm)] nuccell._atm = cell._atm nuccell._bas = numpy.asarray(chg_bas, dtype=numpy.int32) nuccell._env = numpy.hstack((cell._env, chg_env)) wj = lib.asarray(mydf._int_nuc_vloc(nuccell, kpts_lst, 'int3c2e_pvp1')) t1 = log.timer_debug1('pnucp pass1: analytic int', *t1) aoaux = ft_ao.ft_ao(nuccell, Gv) vG = numpy.einsum('i,xi->x', charge, aoaux) * coulG if cell.dimension == 3: nucbar = sum([z/nuccell.bas_exp(i)[0] for i,z in enumerate(charge)]) nucbar *= numpy.pi/cell.vol ovlp = cell.pbc_intor('int1e_kin', 1, lib.HERMITIAN, kpts_lst) for k in range(nkpts): s = lib.pack_tril(ovlp[k]) # *2 due to the factor 1/2 in T wj[k] -= nucbar*2 * s max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) for aoaoks, p0, p1 in mydf.ft_loop(mydf.mesh, kpt_allow, kpts_lst, max_memory=max_memory, aosym='s2', intor='GTO_ft_pdotp'): for k, aoao in enumerate(aoaoks): if aft_jk.gamma_point(kpts_lst[k]): wj[k] += numpy.einsum('k,kx->x', vG[p0:p1].real, aoao.real) wj[k] += numpy.einsum('k,kx->x', vG[p0:p1].imag, aoao.imag) else: wj[k] += numpy.einsum('k,kx->x', vG[p0:p1].conj(), aoao) t1 = log.timer_debug1('contracting pnucp', *t1) wj_kpts = [] for k, kpt in enumerate(kpts_lst): if aft_jk.gamma_point(kpt): wj_kpts.append(lib.unpack_tril(wj[k].real.copy())) else: wj_kpts.append(lib.unpack_tril(wj[k])) if kpts is None or numpy.shape(kpts) == (3,): wj_kpts = wj_kpts[0] return numpy.asarray(wj_kpts)
def gamma2_incore(mycc, t1, t2, l1, l2): log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir time1 = time.clock(), time.time() #:theta = make_theta(t2) #:mOvOv = numpy.einsum('ikca,jkcb->jbia', l2, t2) #:mOVov = -numpy.einsum('ikca,jkbc->jbia', l2, t2) #:mOVov += numpy.einsum('ikac,jkbc->jbia', l2, theta) l2a = numpy.empty((nocc,nvir,nocc,nvir)) t2a = numpy.empty((nocc,nvir,nocc,nvir)) for i in range(nocc): l2a[i] = l2[i].transpose(2,0,1) t2a[i] = t2[i].transpose(2,0,1) mOvOv = lib.dot(t2a.reshape(-1,nov), l2a.reshape(-1,nov).T).reshape(nocc,nvir,nocc,nvir) for i in range(nocc): t2a[i] = t2[i].transpose(1,0,2) mOVov = lib.dot(t2a.reshape(-1,nov), l2a.reshape(-1,nov).T, -1).reshape(nocc,nvir,nocc,nvir) theta = t2a for i in range(nocc): l2a[i] = l2[i].transpose(1,0,2) theta[i] *= 2 theta[i] -= t2[i].transpose(2,0,1) lib.dot(theta.reshape(-1,nov), l2a.reshape(nov,-1).T, 1, mOVov.reshape(nov,-1), 1) theta = l2a = t2a = None moo =(numpy.einsum('jdld->jl', mOvOv) * 2 + numpy.einsum('jdld->jl', mOVov)) mvv =(numpy.einsum('lbld->bd', mOvOv) * 2 + numpy.einsum('lbld->bd', mOVov)) mia =(numpy.einsum('kc,ikac->ia', l1, t2) * 2 - numpy.einsum('kc,ikca->ia', l1, t2)) mab = numpy.einsum('kc,kb->cb', l1, t1) mij = numpy.einsum('kc,jc->jk', l1, t1) + moo*.5 gooov = numpy.zeros((nocc,nocc,nocc,nvir)) tau = _ccsd.make_tau(t2, t1, t1) #:goooo = numpy.einsum('ijab,klab->klij', l2, tau)*.5 goooo = lib.dot(tau.reshape(-1,nvir**2), l2.reshape(-1,nvir**2).T, .5) goooo = goooo.reshape(-1,nocc,nocc,nocc) doooo = _cp(make_theta(goooo).transpose(0,2,1,3)) #:gooov -= numpy.einsum('ib,kjab->jkia', l1, tau) #:gooov -= numpy.einsum('kjab,ib->jkia', l2, t1) #:gooov += numpy.einsum('jkil,la->jkia', goooo, t1*2) gooov = lib.dot(_cp(tau.reshape(-1,nvir)), l1.T, -1) lib.dot(_cp(l2.reshape(-1,nvir)), t1.T, -1, gooov, 1) gooov = gooov.reshape(nocc,nocc,nvir,nocc) tmp = numpy.einsum('ji,ka->jkia', moo*-.5, t1) tmp += gooov.transpose(1,0,3,2) gooov, tmp = tmp, None lib.dot(goooo.reshape(-1,nocc), t1, 2, gooov.reshape(-1,nvir), 1) goovv = numpy.einsum('ia,jb->ijab', mia, t1) for i in range(nocc): goovv[i] += .5 * l2 [i] goovv[i] += .5 * tau[i] #:goovv -= numpy.einsum('jk,kiba->jiba', mij, tau) lib.dot(mij, tau.reshape(nocc,-1), -1, goovv.reshape(nocc,-1), 1) #:goovv -= numpy.einsum('cb,ijac->ijab', mab, t2) #:goovv -= numpy.einsum('bd,ijad->ijab', mvv*.5, tau) lib.dot(t2.reshape(-1,nvir), mab, -1, goovv.reshape(-1,nvir), 1) lib.dot(tau.reshape(-1,nvir), mvv.T, -.5, goovv.reshape(-1,nvir), 1) tau = None #:gooov += numpy.einsum('jaic,kc->jkia', mOvOv, t1) #:gooov -= numpy.einsum('kaic,jc->jkia', mOVov, t1) tmp = lib.dot(mOvOv.reshape(-1,nvir), t1.T).reshape(nocc,-1,nocc,nocc) gooov += tmp.transpose(0,3,2,1) lib.dot(t1, mOVov.reshape(-1,nvir).T, 1, tmp.reshape(nocc,-1), 0) gooov -= tmp.reshape(nocc,nocc,nvir,nocc).transpose(0,1,3,2) dooov = gooov.transpose(0,2,1,3)*2 - gooov.transpose(1,2,0,3) gooov = None #:tmp = numpy.einsum('ikac,jc->jaik', l2, t1) #:gOvVo -= numpy.einsum('jaik,kb->jabi', tmp, t1) #:gOvvO = numpy.einsum('jaki,kb->jabi', tmp, t1) + mOvOv.transpose(0,3,1,2) tmp = tmp.reshape(nocc,nocc,nocc,nvir) lib.dot(t1, l2.reshape(-1,nvir).T, 1, tmp.reshape(nocc,-1)) gOvVo = numpy.einsum('ia,jb->jabi', l1, t1) gOvvO = numpy.empty((nocc,nvir,nvir,nocc)) for i in range(nocc): gOvVo[i] -= lib.dot(_cp(tmp[i].transpose(0,2,1).reshape(-1,nocc)), t1).reshape(nocc,nvir,-1).transpose(1,2,0) gOvVo[i] += mOVov[i].transpose(2,0,1) gOvvO[i] = lib.dot(tmp[i].reshape(nocc,-1).T, t1).reshape(nocc,nvir,-1).transpose(1,2,0) gOvvO[i] += mOvOv[i].transpose(2,0,1) tmp = None dovvo = numpy.empty((nocc,nvir,nvir,nocc)) doovv = numpy.empty((nocc,nocc,nvir,nvir)) for i in range(nocc): tmp = gOvVo[i] * 2 + gOvvO[i] dovvo[i] = tmp.transpose(1,0,2) tmp = gOvvO[i] * -2 - gOvVo[i] doovv[i] = tmp.transpose(2,0,1) gOvvO = gOvVo = None tau2 = _ccsd.make_tau(t2, t1, t1) #:goovv += numpy.einsum('ijkl,klab->ijab', goooo[:,:,j0:j1], tau2) lib.dot(goooo.reshape(nocc*nocc,-1), tau2.reshape(-1,nvir**2), 1, goovv.reshape(-1,nvir**2), 1) tau2 += numpy.einsum('ia,jb->ijab', t1, t1) tau2p = tau2.reshape(nocc,nvir,nocc,nvir) for i in range(nocc): tau2p[i] = tau2[i].transpose(2,0,1) tau2, tau2p = tau2p.reshape(nov,-1), None #:goovv += numpy.einsum('ibld,jlda->ijab', mOvOv, tau2) * .5 #:goovv -= numpy.einsum('iald,jldb->ijab', mOVov, tau2) * .5 tmp = lib.dot(mOvOv.reshape(-1,nov), tau2.T, .5).reshape(nocc,nvir,-1,nvir) for i in range(nocc): tmp[i] = goovv[i].transpose(1,0,2) + tmp[i].transpose(2,1,0) goovv, tmp = tmp, None lib.dot(mOVov.reshape(-1,nov), tau2.T, -.5, goovv.reshape(nov,-1), 1) #:goovv += numpy.einsum('iald,jlbd->ijab', mOVov*2+mOvOv, t2) * .5 t2a, tau2 = tau2.reshape(nocc,nvir,nocc,nvir), None for i in range(nocc): t2a[i] = t2[i].transpose(1,0,2) tmp = mOVov*2 tmp += mOvOv lib.dot(tmp.reshape(-1,nov), t2a.reshape(nov,-1), .5, goovv.reshape(nov,-1), 1) t2a = tmp = None for i in range(nocc): goovv[i] = goovv[i] * 2 - goovv[i].transpose(2,1,0) dovov = goovv goooo = goovv = None #:gvovv += numpy.einsum('aick,kb->aibc', pvOVo, t1) mOVov = lib.transpose(mOVov.reshape(nov,-1)) gvovv = lib.dot(mOVov.reshape(nocc,-1).T, t1).reshape(nvir,nocc,nvir,nvir) mOVov = None tmp = numpy.einsum('ja,jb->ab', l1, t1) #:gvovv += numpy.einsum('ab,ic->aibc', tmp, t1) #:gvovv += numpy.einsum('ba,ic->aibc', mvv, t1*.5) for i in range(nvir): gvovv[i] += numpy.einsum('b,ic->icb', tmp[i], t1) gvovv[i] += numpy.einsum('b,ic->icb', mvv[:,i]*.5, t1) gvovv[i] = gvovv[i].transpose(0,2,1) #:gvovv += numpy.einsum('ja,jibc->aibc', l1, t2) #:gvovv += numpy.einsum('jibc,ja->aibc', l2, t1) #:gvovv -= numpy.einsum('aibk,kc->aibc', pvOvO, t1) mOvOv = lib.transpose(mOvOv.reshape(nov,-1)) lib.dot(mOvOv.reshape(nocc,-1).T, t1, -1, gvovv.reshape(-1,nvir), 1) mOvOv = None lib.dot(l1.T, t2.reshape(nocc,-1), 1, gvovv.reshape(nvir,-1), 1) lib.dot(t1.T, l2.reshape(nocc,-1), 1, gvovv.reshape(nvir,-1), 1) tmp = numpy.empty((nocc,nvir,nvir)) for i in range(nvir): #:gvovv*2 - gvovv.transpose(0,1,3,2) gvovv[i] = _ccsd.make_021(gvovv[i], gvovv[i], 2, -1, out=tmp) #:gvvvv = numpy.einsum('ijab,ijcd->abcd', l2, t2)*.5 #:jabc = numpy.einsum('ijab,ic->jabc', l2, t1) * .5 #:gvvvv += numpy.einsum('jabc,jd->abcd', jabc, t1) #:gvovv -= numpy.einsum('adbc,id->aibc', gvvvv, t1*2) tau = _ccsd.make_tau(t2, t1, t1) theta = make_theta(tau) tau = None l2tmp = lib.pack_tril(l2.reshape(-1,nvir,nvir)) gtmp = lib.dot(l2tmp.T, theta.reshape(nocc**2,-1), .5).reshape(-1,nvir,nvir) l2tmp = theta = None nvir_pair = nvir * (nvir+1) //2 tmp = numpy.empty((nvir,nvir,nvir)) tmp1 = numpy.empty((nvir,nvir,nvir)) tmptril = numpy.empty((nvir,nvir_pair)) diag_idx = numpy.arange(nvir) diag_idx = diag_idx*(diag_idx+1)//2 + diag_idx dvvvv = numpy.empty((nvir_pair,nvir_pair)) dovvv = numpy.empty((nocc,nvir,nvir,nvir)) # dvvov = (gvovv*2 - gvovv.transpose(0,1,3,2)).transpose(0,2,1,3) # dovvv = dvvov.transpose(2,3,0,1) p0 = 0 for i in range(nvir): tmp[:i+1] = gtmp[p0:p0+i+1] for j in range(i+1, nvir): tmp[j] = gtmp[j*(j+1)//2+i].T lib.dot(t1, tmp.reshape(nvir,-1), -2, gvovv[i].reshape(nocc,-1), 1) dovvv[:,:,i] = gvovv[i].transpose(0,2,1) #:gvvvv[i] = (tmp*2-tmp.transpose(0,2,1)).transpose(1,0,2) #:gvvvv = .5*(gvvvv+gvvvv.transpose(0,1,3,2)) #:dvvvv = .5*(gvvvv+gvvvv.transpose(1,0,3,2)) tmp1[:] = tmp.transpose(1,0,2) _ccsd.precontract(tmp1, diag_fac=2, out=tmptril) dvvvv[p0:p0+i] += tmptril[:i] dvvvv[p0:p0+i] *= .25 dvvvv[i*(i+1)//2+i] = tmptril[i] * .5 for j in range(i+1, nvir): dvvvv[j*(j+1)//2+i] = tmptril[j] p0 += i + 1 gtmp = tmp = tmp1 = tmptril = gvovv = None dvvov = dovvv.transpose(2,3,0,1) return (dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov)
def _gamma2_outcore(myci, civec, nmo, nocc, h5fobj, compress_vvvv=False): log = logger.Logger(myci.stdout, myci.verbose) nocc = myci.nocc nmo = myci.nmo nvir = nmo - nocc nvir_pair = nvir * (nvir+1) // 2 c0, c1, c2 = myci.cisdvec_to_amplitudes(civec, nmo, nocc) h5fobj['dovov'] = (2*c0*c2.conj().transpose(0,2,1,3) - c0*c2.conj().transpose(1,2,0,3)) doooo = lib.einsum('ijab,klab->ijkl', c2.conj(), c2) h5fobj['doooo'] = doooo.transpose(0,2,1,3) - doooo.transpose(1,2,0,3)*.5 doooo = None dooov =-lib.einsum('ia,klac->klic', c1*2, c2.conj()) h5fobj['dooov'] = dooov.transpose(0,2,1,3)*2 - dooov.transpose(1,2,0,3) dooov = None #:dvovv = numpy.einsum('ia,ikcd->akcd', c1, c2) * 2 #:dvvvv = lib.einsum('ijab,ijcd->abcd', c2, c2) max_memory = max(0, myci.max_memory - lib.current_memory()[0]) unit = max(nocc**2*nvir*2+nocc*nvir**2*3 + 1, nvir**3*2+nocc*nvir**2 + 1) blksize = min(nvir, max(BLKMIN, int(max_memory*.9e6/8/unit))) log.debug1('rdm intermediates: block size = %d, nvir = %d in %d blocks', blksize, nocc, int((nvir+blksize-1)/blksize)) dtype = numpy.result_type(civec).char dovvv = h5fobj.create_dataset('dovvv', (nocc,nvir,nvir,nvir), dtype, chunks=(nocc,min(nocc,nvir),1,nvir)) if compress_vvvv: dvvvv = h5fobj.create_dataset('dvvvv', (nvir_pair,nvir_pair), dtype) else: dvvvv = h5fobj.create_dataset('dvvvv', (nvir,nvir,nvir,nvir), dtype) for istep, (p0, p1) in enumerate(lib.prange(0, nvir, blksize)): theta = c2[:,:,p0:p1] - c2[:,:,p0:p1].transpose(1,0,2,3) * .5 gvvvv = lib.einsum('ijab,ijcd->abcd', theta.conj(), c2) if compress_vvvv: # symmetrize dvvvv because it does not affect the results of cisd_grad # dvvvv = (dvvvv+dvvvv.transpose(0,1,3,2)) * .5 # dvvvv = (dvvvv+dvvvv.transpose(1,0,2,3)) * .5 # now dvvvv == dvvvv.transpose(0,1,3,2) == dvvvv.transpose(1,0,3,2) tmp = numpy.empty((nvir,nvir,nvir)) tmpvvvv = numpy.empty((p1-p0,nvir,nvir_pair)) for i in range(p1-p0): tmp[:] = gvvvv[i].conj().transpose(1,0,2) lib.pack_tril(tmp+tmp.transpose(0,2,1), out=tmpvvvv[i]) # tril of (dvvvv[p0:p1,p0:p1]+dvvvv[p0:p1,p0:p1].T) for i in range(p0, p1): for j in range(p0, i): tmpvvvv[i-p0,j] += tmpvvvv[j-p0,i] tmpvvvv[i-p0,i] *= 2 for i in range(p1, nvir): off = i * (i+1) // 2 dvvvv[off+p0:off+p1] = tmpvvvv[:,i] for i in range(p0, p1): off = i * (i+1) // 2 if p0 > 0: tmpvvvv[i-p0,:p0] += dvvvv[off:off+p0] dvvvv[off:off+i+1] = tmpvvvv[i-p0,:i+1] * .25 tmp = tmpvvvv = None else: for i in range(p0, p1): dvvvv[i] = gvvvv[i-p0].conj().transpose(1,0,2) gvovv = numpy.einsum('ia,ikcd->akcd', c1[:,p0:p1].conj()*2, c2) gvovv = gvovv.conj() dovvv[:,:,p0:p1] = gvovv.transpose(1,3,0,2)*2 - gvovv.transpose(1,2,0,3) theta = c2*2 - c2.transpose(1,0,2,3) doovv = numpy.einsum('ia,kc->ikca', c1.conj(), -c1) doovv -= lib.einsum('kjcb,kica->jiab', c2.conj(), theta) doovv -= lib.einsum('ikcb,jkca->ijab', c2.conj(), theta) h5fobj['doovv'] = doovv doovv = None dovvo = lib.einsum('ikac,jkbc->iabj', theta.conj(), theta) dovvo += numpy.einsum('ia,kc->iack', c1.conj(), c1) * 2 h5fobj['dovvo'] = dovvo theta = dovvo = None dvvov = None return (h5fobj['dovov'], h5fobj['dvvvv'], h5fobj['doooo'], h5fobj['doovv'], h5fobj['dovvo'], dvvov , h5fobj['dovvv'], h5fobj['dooov'])
def __init__(self, myci, mo_coeff=None, method='incore'): cput0 = (time.clock(), time.time()) moidx = numpy.ones(myci.mo_occ.size, dtype=numpy.bool) if isinstance(myci.frozen, (int, numpy.integer)): moidx[:myci.frozen] = False elif len(myci.frozen) > 0: moidx[numpy.asarray(myci.frozen)] = False if mo_coeff is None: self.mo_coeff = mo_coeff = myci.mo_coeff[:,moidx] else: self.mo_coeff = mo_coeff = mo_coeff[:,moidx] dm = myci._scf.make_rdm1(myci.mo_coeff, myci.mo_occ) fockao = myci._scf.get_hcore() + myci._scf.get_veff(myci.mol, dm) self.fock = reduce(numpy.dot, (mo_coeff.T, fockao, mo_coeff)) nocc = myci.nocc nmo = myci.nmo nvir = nmo - nocc mem_incore, mem_outcore, mem_basic = ccsd._mem_usage(nocc, nvir) mem_now = lib.current_memory()[0] log = logger.Logger(myci.stdout, myci.verbose) if (method == 'incore' and myci._scf._eri is not None and (mem_incore+mem_now < myci.max_memory) or myci.mol.incore_anyway): eri1 = ao2mo.incore.full(myci._scf._eri, mo_coeff) #:eri1 = ao2mo.restore(1, eri1, nmo) #:self.oooo = eri1[:nocc,:nocc,:nocc,:nocc].copy() #:self.ooov = eri1[:nocc,:nocc,:nocc,nocc:].copy() #:self.vooo = eri1[nocc:,:nocc,:nocc,:nocc].copy() #:self.voov = eri1[nocc:,:nocc,:nocc,nocc:].copy() #:self.vvoo = eri1[nocc:,nocc:,:nocc,:nocc].copy() #:vovv = eri1[nocc:,:nocc,nocc:,nocc:].copy() #:self.vovv = lib.pack_tril(vovv.reshape(-1,nvir,nvir)) #:self.vvvv = ao2mo.restore(4, eri1[nocc:,nocc:,nocc:,nocc:], nvir) nvir_pair = nvir * (nvir+1) // 2 self.oooo = numpy.empty((nocc,nocc,nocc,nocc)) self.ooov = numpy.empty((nocc,nocc,nocc,nvir)) self.vooo = numpy.empty((nvir,nocc,nocc,nocc)) self.voov = numpy.empty((nvir,nocc,nocc,nvir)) self.vovv = numpy.empty((nvir,nocc,nvir_pair)) self.vvvv = numpy.empty((nvir_pair,nvir_pair)) ij = 0 outbuf = numpy.empty((nmo,nmo,nmo)) oovv = numpy.empty((nocc,nocc,nvir,nvir)) for i in range(nocc): buf = lib.unpack_tril(eri1[ij:ij+i+1], out=outbuf[:i+1]) for j in range(i+1): self.oooo[i,j] = self.oooo[j,i] = buf[j,:nocc,:nocc] self.ooov[i,j] = self.ooov[j,i] = buf[j,:nocc,nocc:] oovv[i,j] = oovv[j,i] = buf[j,nocc:,nocc:] ij += i + 1 self.vvoo = lib.transpose(oovv.reshape(nocc**2,-1)).reshape(nvir,nvir,nocc,nocc) oovv = None ij1 = 0 for i in range(nocc,nmo): buf = lib.unpack_tril(eri1[ij:ij+i+1], out=outbuf[:i+1]) self.vooo[i-nocc] = buf[:nocc,:nocc,:nocc] self.voov[i-nocc] = buf[:nocc,:nocc,nocc:] lib.pack_tril(_cp(buf[:nocc,nocc:,nocc:]), out=self.vovv[i-nocc]) dij = i - nocc + 1 lib.pack_tril(_cp(buf[nocc:i+1,nocc:,nocc:]), out=self.vvvv[ij1:ij1+dij]) ij += i + 1 ij1 += dij else: cput1 = time.clock(), time.time() _tmpfile1 = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) _tmpfile2 = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) self.feri1 = feri1 = h5py.File(_tmpfile1.name) def __del__feri1(self): feri1.close() self.feri1.__del__ = __del__feri1 orbo = mo_coeff[:,:nocc] orbv = mo_coeff[:,nocc:] nvpair = nvir * (nvir+1) // 2 self.oooo = self.feri1.create_dataset('oooo', (nocc,nocc,nocc,nocc), 'f8') self.ooov = self.feri1.create_dataset('ooov', (nocc,nocc,nocc,nvir), 'f8') self.vvoo = self.feri1.create_dataset('vvoo', (nvir,nvir,nocc,nocc), 'f8') self.vooo = self.feri1.create_dataset('vooo', (nvir,nocc,nocc,nocc), 'f8') self.voov = self.feri1.create_dataset('voov', (nvir,nocc,nocc,nvir), 'f8') self.vovv = self.feri1.create_dataset('vovv', (nvir,nocc,nvpair), 'f8') fsort = _ccsd.libcc.CCsd_sort_inplace nocc_pair = nocc*(nocc+1)//2 nvir_pair = nvir*(nvir+1)//2 def sort_inplace(p0, p1, eri): fsort(eri.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nocc), ctypes.c_int(nvir), ctypes.c_int((p1-p0)*nocc)) vv = eri[:,:nvir_pair] oo = eri[:,nvir_pair:nvir_pair+nocc_pair] ov = eri[:,nvir_pair+nocc_pair:].reshape(-1,nocc,nvir) return oo, ov, vv buf = numpy.empty((nmo,nmo,nmo)) oovv = numpy.empty((nocc,nocc,nvir,nvir)) def save_occ_frac(p0, p1, eri): oo, ov, vv = sort_inplace(p0, p1, eri) self.oooo[p0:p1] = lib.unpack_tril(oo, out=buf).reshape(p1-p0,nocc,nocc,nocc) self.ooov[p0:p1] = ov.reshape(p1-p0,nocc,nocc,nvir) oovv[p0:p1] = lib.unpack_tril(vv, out=buf).reshape(p1-p0,nocc,nvir,nvir) def save_vir_frac(p0, p1, eri): oo, ov, vv = sort_inplace(p0, p1, eri) self.vooo[p0:p1] = lib.unpack_tril(oo, out=buf).reshape(p1-p0,nocc,nocc,nocc) self.voov[p0:p1] = ov.reshape(p1-p0,nocc,nocc,nvir) self.vovv[p0:p1] = vv.reshape(p1-p0,nocc,-1) if not myci.direct: max_memory = max(2000,myci.max_memory-lib.current_memory()[0]) self.feri2 = feri2 = h5py.File(_tmpfile2.name) def __del__feri2(self): feri2.close() self.feri2.__del__ = __del__feri2 ao2mo.full(myci.mol, orbv, self.feri2, max_memory=max_memory, verbose=log) self.vvvv = self.feri2['eri_mo'] cput1 = log.timer_debug1('transforming vvvv', *cput1) tmpfile3 = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) with h5py.File(tmpfile3.name, 'w') as feri: max_memory = max(2000, myci.max_memory-lib.current_memory()[0]) mo = numpy.hstack((orbv, orbo)) ao2mo.general(myci.mol, (mo,orbo,mo,mo), feri, max_memory=max_memory, verbose=log) cput1 = log.timer_debug1('transforming oppp', *cput1) blksize = max(1, int(min(8e9,max_memory*.5e6)/8/nmo**2/nocc)) handler = None for p0, p1 in lib.prange(0, nvir, blksize): eri = _cp(feri['eri_mo'][p0*nocc:p1*nocc]) handler = async_do(handler, save_vir_frac, p0, p1, eri) for p0, p1 in lib.prange(0, nocc, blksize): eri = _cp(feri['eri_mo'][(p0+nvir)*nocc:(p1+nvir)*nocc]) handler = async_do(handler, save_occ_frac, p0, p1, eri) if handler is not None: handler.join() self.vvoo[:] = lib.transpose(oovv.reshape(nocc**2,-1)).reshape(nvir,nvir,nocc,nocc) log.timer('CISD integral transformation', *cput0)
def kernel(mc, mo_coeff=None, ci=None, atmlst=None, mf_grad=None, verbose=None): if mo_coeff is None: mo_coeff = mc._scf.mo_coeff if ci is None: ci = mc.ci if mf_grad is None: mf_grad = mc._scf.nuc_grad_method() assert(isinstance(ci, numpy.ndarray)) mol = mc.mol ncore = mc.ncore ncas = mc.ncas nocc = ncore + ncas nelecas = mc.nelecas nao, nmo = mo_coeff.shape nao_pair = nao * (nao+1) // 2 mo_energy = mc._scf.mo_energy mo_occ = mo_coeff[:,:nocc] mo_core = mo_coeff[:,:ncore] mo_cas = mo_coeff[:,ncore:nocc] neleca, nelecb = mol.nelec assert(neleca == nelecb) orbo = mo_coeff[:,:neleca] orbv = mo_coeff[:,neleca:] casdm1, casdm2 = mc.fcisolver.make_rdm12(ci, ncas, nelecas) dm_core = numpy.dot(mo_core, mo_core.T) * 2 dm_cas = reduce(numpy.dot, (mo_cas, casdm1, mo_cas.T)) aapa = ao2mo.kernel(mol, (mo_cas, mo_cas, mo_coeff, mo_cas), compact=False) aapa = aapa.reshape(ncas,ncas,nmo,ncas) vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas)) h1 = mc.get_hcore() vhf_c = vj[0] - vk[0] * .5 vhf_a = vj[1] - vk[1] * .5 # Imat = h1_{pi} gamma1_{iq} + h2_{pijk} gamma_{iqkj} Imat = numpy.zeros((nmo,nmo)) Imat[:,:nocc] = reduce(numpy.dot, (mo_coeff.T, h1 + vhf_c + vhf_a, mo_occ)) * 2 Imat[:,ncore:nocc] = reduce(numpy.dot, (mo_coeff.T, h1 + vhf_c, mo_cas, casdm1)) Imat[:,ncore:nocc] += lib.einsum('uviw,vuwt->it', aapa, casdm2) aapa = vj = vk = vhf_c = vhf_a = h1 = None ee = mo_energy[:,None] - mo_energy zvec = numpy.zeros_like(Imat) zvec[:ncore,ncore:neleca] = Imat[:ncore,ncore:neleca] / -ee[:ncore,ncore:neleca] zvec[ncore:neleca,:ncore] = Imat[ncore:neleca,:ncore] / -ee[ncore:neleca,:ncore] zvec[nocc:,neleca:nocc] = Imat[nocc:,neleca:nocc] / -ee[nocc:,neleca:nocc] zvec[neleca:nocc,nocc:] = Imat[neleca:nocc,nocc:] / -ee[neleca:nocc,nocc:] zvec_ao = reduce(numpy.dot, (mo_coeff, zvec+zvec.T, mo_coeff.T)) vhf = mc._scf.get_veff(mol, zvec_ao) * 2 xvo = reduce(numpy.dot, (orbv.T, vhf, orbo)) xvo += Imat[neleca:,:neleca] - Imat[:neleca,neleca:].T def fvind(x): x = x.reshape(xvo.shape) dm = reduce(numpy.dot, (orbv, x, orbo.T)) v = mc._scf.get_veff(mol, dm + dm.T) v = reduce(numpy.dot, (orbv.T, v, orbo)) return v * 2 dm1resp = cphf.solve(fvind, mo_energy, mc._scf.mo_occ, xvo, max_cycle=30)[0] zvec[neleca:,:neleca] = dm1resp zeta = numpy.einsum('ij,j->ij', zvec, mo_energy) zeta = reduce(numpy.dot, (mo_coeff, zeta, mo_coeff.T)) zvec_ao = reduce(numpy.dot, (mo_coeff, zvec+zvec.T, mo_coeff.T)) p1 = numpy.dot(mo_coeff[:,:neleca], mo_coeff[:,:neleca].T) vhf_s1occ = reduce(numpy.dot, (p1, mc._scf.get_veff(mol, zvec_ao), p1)) Imat[:ncore,ncore:neleca] = 0 Imat[ncore:neleca,:ncore] = 0 Imat[nocc:,neleca:nocc] = 0 Imat[neleca:nocc,nocc:] = 0 Imat[neleca:,:neleca] = Imat[:neleca,neleca:].T im1 = reduce(numpy.dot, (mo_coeff, Imat, mo_coeff.T)) casci_dm1 = dm_core + dm_cas hf_dm1 = mc._scf.make_rdm1(mo_coeff, mc._scf.mo_occ) hcore_deriv = mf_grad.hcore_generator(mol) s1 = mf_grad.get_ovlp(mol) diag_idx = numpy.arange(nao) diag_idx = diag_idx * (diag_idx+1) // 2 + diag_idx casdm2_cc = casdm2 + casdm2.transpose(0,1,3,2) dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2,ncas**2), mo_cas.T, (0, nao, 0, nao)).reshape(ncas**2,nao,nao) dm2buf = lib.pack_tril(dm2buf) dm2buf[:,diag_idx] *= .5 dm2buf = dm2buf.reshape(ncas,ncas,nao_pair) casdm2 = casdm2_cc = None if atmlst is None: atmlst = range(mol.natm) aoslices = mol.aoslice_by_atom() de = numpy.zeros((len(atmlst),3)) max_memory = mc.max_memory - lib.current_memory()[0] blksize = int(max_memory*.9e6/8 / ((aoslices[:,3]-aoslices[:,2]).max()*nao_pair)) blksize = min(nao, max(2, blksize)) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = aoslices[ia] h1ao = hcore_deriv(ia) de[k] += numpy.einsum('xij,ij->x', h1ao, casci_dm1) de[k] += numpy.einsum('xij,ij->x', h1ao, zvec_ao) vhf1 = numpy.zeros((3,nao,nao)) q1 = 0 for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize): q0, q1 = q1, q1 + nf dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1], mo_cas[q0:q1]) shls_slice = (shl0,shl1,b0,b1,0,mol.nbas,0,mol.nbas) eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl', shls_slice=shls_slice).reshape(3,p1-p0,nf,nao_pair) de[k] -= numpy.einsum('xijw,ijw->x', eri1, dm2_ao) * 2 for i in range(3): eri1tmp = lib.unpack_tril(eri1[i].reshape((p1-p0)*nf,-1)) eri1tmp = eri1tmp.reshape(p1-p0,nf,nao,nao) de[k,i] -= numpy.einsum('ijkl,ij,kl', eri1tmp, hf_dm1[p0:p1,q0:q1], zvec_ao) * 2 de[k,i] -= numpy.einsum('ijkl,kl,ij', eri1tmp, hf_dm1, zvec_ao[p0:p1,q0:q1]) * 2 de[k,i] += numpy.einsum('ijkl,il,kj', eri1tmp, hf_dm1[p0:p1], zvec_ao[q0:q1]) de[k,i] += numpy.einsum('ijkl,jk,il', eri1tmp, hf_dm1[q0:q1], zvec_ao[p0:p1]) #:vhf1c, vhf1a = mf_grad.get_veff(mol, (dm_core, dm_cas)) #:de[k] += numpy.einsum('xij,ij->x', vhf1c[:,p0:p1], casci_dm1[p0:p1]) * 2 #:de[k] += numpy.einsum('xij,ij->x', vhf1a[:,p0:p1], dm_core[p0:p1]) * 2 de[k,i] -= numpy.einsum('ijkl,lk,ij', eri1tmp, dm_core[q0:q1], casci_dm1[p0:p1]) * 2 de[k,i] += numpy.einsum('ijkl,jk,il', eri1tmp, dm_core[q0:q1], casci_dm1[p0:p1]) de[k,i] -= numpy.einsum('ijkl,lk,ij', eri1tmp, dm_cas[q0:q1], dm_core[p0:p1]) * 2 de[k,i] += numpy.einsum('ijkl,jk,il', eri1tmp, dm_cas[q0:q1], dm_core[p0:p1]) eri1 = eri1tmp = None de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], im1[p0:p1]) de[k] -= numpy.einsum('xij,ji->x', s1[:,p0:p1], im1[:,p0:p1]) de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], zeta[p0:p1]) * 2 de[k] -= numpy.einsum('xij,ji->x', s1[:,p0:p1], zeta[:,p0:p1]) * 2 de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], vhf_s1occ[p0:p1]) * 2 de[k] -= numpy.einsum('xij,ji->x', s1[:,p0:p1], vhf_s1occ[:,p0:p1]) * 2 de += mf_grad.grad_nuc(mol, atmlst) return de
def general(eri, mo_coeffs, erifile, dataname='eri_mo', ioblk_size=IOBLK_SIZE, compact=True, verbose=logger.NOTE): '''For the given four sets of orbitals, transfer arbitrary spherical AO integrals to MO integrals on disk. Args: eri : 8-fold reduced eri vector mo_coeffs : 4-item list of ndarray Four sets of orbital coefficients, corresponding to the four indices of (ij|kl) erifile : str or h5py File or h5py Group object To store the transformed integrals, in HDF5 format. Kwargs dataname : str The dataset name in the erifile (ref the hierarchy of HDF5 format http://www.hdfgroup.org/HDF5/doc1.6/UG/09_Groups.html). By assigning different dataname, the existed integral file can be reused. If the erifile contains the dataname, the new integrals data will overwrite the old one. ioblk_size : float or int The block size for IO, large block size may **not** improve performance compact : bool When compact is True, depending on the four oribital sets, the returned MO integrals has (up to 4-fold) permutation symmetry. If it's False, the function will abandon any permutation symmetry, and return the "plain" MO integrals Pseudocode / algorithm: u = mu v = nu l = lambda o = sigma Assume eri's are 8-fold reduced. nij/nkl_pair = npair or i*j/k*l if only transforming a subset First half transform: Initialize half_eri of size (nij_pair,npair) For lo = 1 -> npair Unpack row lo Unpack row lo to matrix E_{uv}^{lo} Transform C_ui^+*E*C_nj -> E_{ij}^{lo} Ravel or pack E_{ij}^{lo} Save E_{ij}^{lo} -> half_eri[:,lo] Second half transform: Initialize h5d_eri of size (nij_pair,nkl_pair) For ij = 1 -> nij_pair Load and unpack half_eri[ij,:] -> E_{lo}^{ij} Transform C_{lk}E_{lo}^{ij}C_{ol} -> E_{kl}^{ij} Repack E_{kl}^{ij} Save E_{kl}^{ij} -> h5d_eri[ij,:] Each matrix is indexed by the composite index ij x kl, where ij/kl is either npair or ixj/kxl, if only a subset of MOs are being transformed. Since entire rows or columns need to be read in, the arrays are chunked such that IOBLK_SIZE = row/col x chunking col/row. For example, for the first half transform, we would save in nij_pair x IOBLK_SIZE/nij_pair, then load in IOBLK_SIZE/nkl_pair x npair for the second half transform. ------ kl -----> |jxl | ij | | v As a first guess, the chunking size is jxl. If the super-rows/cols are larger than IOBLK_SIZE, then the chunk rectangle jxl is trimmed accordingly. The pathological limiting case is where the dimensions nao_pair, nij_pair, or nkl_pair are so large that the arrays are chunked 1x1, in which case IOBLK_SIZE needs to be increased. ''' log = logger.new_logger(None, verbose) log.info('******** ao2mo disk, custom eri ********') nmoi = mo_coeffs[0].shape[1] nmoj = mo_coeffs[1].shape[1] nmok = mo_coeffs[2].shape[1] nmol = mo_coeffs[3].shape[1] nao = mo_coeffs[0].shape[0] nao_pair = nao * (nao + 1) // 2 if compact and iden_coeffs(mo_coeffs[0], mo_coeffs[1]): ij_red = False nij_pair = nmoi * (nmoi + 1) // 2 else: ij_red = True nij_pair = nmoi * nmoj if compact and iden_coeffs(mo_coeffs[2], mo_coeffs[3]): kl_red = False nkl_pair = nmok * (nmok + 1) // 2 else: kl_red = True nkl_pair = nmok * nmol chunks_half = (max( 1, numpy.minimum(int(ioblk_size // (nao_pair * f8_size)), nmoj)), max( 1, numpy.minimum(int(ioblk_size // (nij_pair * f8_size)), nmol))) ''' ideally, the final transformed eris should have a chunk of nmoj x nmol to optimize read operations. However, I'm chunking the row size so that the write operations during the transform can be done as fast as possible. ''' chunks_full = (numpy.minimum(int(ioblk_size // (nkl_pair * f8_size)), nmoj), nmol) if isinstance(erifile, str): if h5py.is_hdf5(erifile): feri = h5py.File(erifile) if dataname in feri: del (feri[dataname]) else: feri = h5py.File(erifile, 'w', libver='latest') else: assert (isinstance(erifile, h5py.Group)) feri = erifile h5d_eri = feri.create_dataset(dataname, (nij_pair, nkl_pair), 'f8', chunks=chunks_full) feri_swap = lib.H5TmpFile(libver='latest') half_eri = feri_swap.create_dataset(dataname, (nij_pair, nao_pair), 'f8', chunks=chunks_half) log.debug('Memory information:') log.debug(' IOBLK_SIZE (MB): {}'.format(ioblk_size)) log.debug(' jxl {}x{}, half eri chunk dim {}x{}'.format( nmoj, nmol, chunks_half[0], chunks_half[1])) log.debug(' jxl {}x{}, full eri chunk dim {}x{}'.format( nmoj, nmol, chunks_full[0], chunks_full[1])) log.debug(' Final disk eri size (MB): {:.3g}, chunked {:.3g}'.format( nij_pair * nkl_pair * f8_size, numpy.prod(chunks_full) * f8_size)) log.debug( ' Half transformed eri size (MB): {:.3g}, chunked {:.3g}'.format( nij_pair * nao_pair * f8_size, numpy.prod(chunks_half) * f8_size)) log.debug(' RAM buffer for half transform (MB): {:.3g}'.format( nij_pair * chunks_half[1] * f8_size * 2)) log.debug(' RAM buffer for full transform (MB): {:.3g}'.format( f8_size * chunks_full[0] * nkl_pair * 2 + chunks_half[0] * nao_pair * f8_size * 2)) def save1(piece, buf): start = piece * chunks_half[1] stop = (piece + 1) * chunks_half[1] if stop > nao_pair: stop = nao_pair half_eri[:, start:stop] = buf[:, :stop - start] return def load2(piece): start = piece * chunks_half[0] stop = (piece + 1) * chunks_half[0] if stop > nij_pair: stop = nij_pair if start >= nij_pair: start = stop - 1 return half_eri[start:stop, :] def prefetch2(piece): start = piece * chunks_half[0] stop = (piece + 1) * chunks_half[0] if stop > nij_pair: stop = nij_pair if start >= nij_pair: start = stop - 1 buf_prefetch[:stop - start, :] = half_eri[start:stop, :] return def save2(piece, buf): start = piece * chunks_full[0] stop = (piece + 1) * chunks_full[0] if stop > nij_pair: stop = nij_pair h5d_eri[start:stop, :] = buf[:stop - start, :] return # transform \mu\nu -> ij cput0 = time.clock(), time.time() Cimu = mo_coeffs[0].conj().transpose() buf_write = numpy.empty((nij_pair, chunks_half[1])) buf_out = numpy.empty_like(buf_write) wpiece = 0 with lib.call_in_background(save1) as async_write: for lo in range(nao_pair): if lo % chunks_half[1] == 0 and lo > 0: #save1(wpiece,buf_write) buf_out, buf_write = buf_write, buf_out async_write(wpiece, buf_out) wpiece += 1 buf = lib.unpack_row(eri, lo) uv = lib.unpack_tril(buf) uv = Cimu.dot(uv).dot(mo_coeffs[1]) if ij_red: ij = numpy.ravel(uv) # grabs by row else: ij = lib.pack_tril(uv) buf_write[:, lo % chunks_half[1]] = ij # final write operation & cleanup save1(wpiece, buf_write) log.timer('(uv|lo) -> (ij|lo)', *cput0) uv = None ij = None buf = None # transform \lambda\sigma -> kl cput1 = time.clock(), time.time() Cklam = mo_coeffs[2].conj().transpose() buf_write = numpy.empty((chunks_full[0], nkl_pair)) buf_out = numpy.empty_like(buf_write) buf_read = numpy.empty((chunks_half[0], nao_pair)) buf_prefetch = numpy.empty_like(buf_read) rpiece = 0 wpiece = 0 with lib.call_in_background(save2, prefetch2) as (async_write, prefetch): buf_read = load2(rpiece) prefetch(rpiece + 1) for ij in range(nij_pair): if ij % chunks_full[0] == 0 and ij > 0: #save2(wpiece,buf_write) buf_out, buf_write = buf_write, buf_out async_write(wpiece, buf_out) wpiece += 1 if ij % chunks_half[0] == 0 and ij > 0: #buf_read = load2(rpiece) buf_read, buf_prefetch = buf_prefetch, buf_read rpiece += 1 prefetch(rpiece + 1) lo = lib.unpack_tril(buf_read[ij % chunks_half[0], :]) lo = Cklam.dot(lo).dot(mo_coeffs[3]) if kl_red: kl = numpy.ravel(lo) else: kl = lib.pack_tril(lo) buf_write[ij % chunks_full[0], :] = kl save2(wpiece, buf_write) log.timer('(ij|lo) -> (ij|kl)', *cput1) if isinstance(erifile, str): feri.close() return erifile
def _gamma2_outcore(mycc, t1, t2, l1, l2, h5fobj, compress_vvvv=False): log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir nvir_pair = nvir * (nvir + 1) // 2 dtype = numpy.result_type(t1, t2, l1, l2).char if compress_vvvv: dvvvv = h5fobj.create_dataset('dvvvv', (nvir_pair, nvir_pair), dtype) else: dvvvv = h5fobj.create_dataset('dvvvv', (nvir, nvir, nvir, nvir), dtype) dovvo = h5fobj.create_dataset('dovvo', (nocc, nvir, nvir, nocc), dtype, chunks=(nocc, 1, nvir, nocc)) fswap = lib.H5TmpFile() time1 = time.clock(), time.time() pvOOv = lib.einsum('ikca,jkcb->aijb', l2, t2) moo = numpy.einsum('dljd->jl', pvOOv) * 2 mvv = numpy.einsum('blld->db', pvOOv) * 2 gooov = lib.einsum('kc,cija->jkia', t1, pvOOv) fswap['mvOOv'] = pvOOv pvOOv = None pvoOV = -lib.einsum('ikca,jkbc->aijb', l2, t2) theta = t2 * 2 - t2.transpose(0, 1, 3, 2) pvoOV += lib.einsum('ikac,jkbc->aijb', l2, theta) moo += numpy.einsum('dljd->jl', pvoOV) mvv += numpy.einsum('blld->db', pvoOV) gooov -= lib.einsum('jc,cika->jkia', t1, pvoOV) fswap['mvoOV'] = pvoOV pvoOV = None mia = (numpy.einsum('kc,ikac->ia', l1, t2) * 2 - numpy.einsum('kc,ikca->ia', l1, t2)) mab = numpy.einsum('kc,kb->cb', l1, t1) mij = numpy.einsum('kc,jc->jk', l1, t1) + moo * .5 tau = numpy.einsum('ia,jb->ijab', t1, t1) tau += t2 goooo = lib.einsum('ijab,klab->ijkl', tau, l2) * .5 h5fobj['doooo'] = (goooo.transpose(0, 2, 1, 3) * 2 - goooo.transpose(0, 3, 1, 2)).conj() gooov += numpy.einsum('ji,ka->jkia', -.5 * moo, t1) gooov += lib.einsum('la,jkil->jkia', 2 * t1, goooo) gooov -= lib.einsum('ib,jkba->jkia', l1, tau) gooov = gooov.conj() gooov -= lib.einsum('jkba,ib->jkia', l2, t1) h5fobj['dooov'] = gooov.transpose(0, 2, 1, 3) * 2 - gooov.transpose( 1, 2, 0, 3) tau = goovo = None time1 = log.timer_debug1('rdm intermediates pass1', *time1) goovv = numpy.einsum('ia,jb->ijab', mia.conj(), t1.conj()) max_memory = max(0, mycc.max_memory - lib.current_memory()[0]) unit = nocc**2 * nvir * 6 blksize = min(nocc, nvir, max(ccsd.BLKMIN, int(max_memory * .95e6 / 8 / unit))) doovv = h5fobj.create_dataset('doovv', (nocc, nocc, nvir, nvir), dtype, chunks=(nocc, nocc, 1, nvir)) log.debug1( 'rdm intermediates pass 2: block size = %d, nvir = %d in %d blocks', blksize, nvir, int((nvir + blksize - 1) / blksize)) for p0, p1 in lib.prange(0, nvir, blksize): tau = numpy.einsum('ia,jb->ijab', t1[:, p0:p1], t1) tau += t2[:, :, p0:p1] tmpoovv = lib.einsum('ijkl,klab->ijab', goooo, tau) tmpoovv -= lib.einsum('jk,ikab->ijab', mij, tau) tmpoovv -= lib.einsum('cb,ijac->ijab', mab, t2[:, :, p0:p1]) tmpoovv -= lib.einsum('bd,ijad->ijab', mvv * .5, tau) tmpoovv += .5 * tau tmpoovv = tmpoovv.conj() tmpoovv += .5 * l2[:, :, p0:p1] goovv[:, :, p0:p1] += tmpoovv pvOOv = fswap['mvOOv'][p0:p1] pvoOV = fswap['mvoOV'][p0:p1] gOvvO = lib.einsum('kiac,jc,kb->iabj', l2[:, :, p0:p1], t1, t1) gOvvO += numpy.einsum('aijb->iabj', pvOOv) govVO = numpy.einsum('ia,jb->iabj', l1[:, p0:p1], t1) govVO -= lib.einsum('ikac,jc,kb->iabj', l2[:, :, p0:p1], t1, t1) govVO += numpy.einsum('aijb->iabj', pvoOV) dovvo[:, p0:p1] = 2 * govVO + gOvvO doovv[:, :, p0:p1] = (-2 * gOvvO - govVO).transpose(3, 0, 1, 2).conj() gOvvO = govVO = None tau -= t2[:, :, p0:p1] * .5 for q0, q1 in lib.prange(0, nvir, blksize): goovv[:, :, q0:q1, :] += lib.einsum('dlib,jlda->ijab', pvOOv, tau[:, :, :, q0:q1]).conj() goovv[:, :, :, q0:q1] -= lib.einsum('dlia,jldb->ijab', pvoOV, tau[:, :, :, q0:q1]).conj() tmp = pvoOV[:, :, :, q0:q1] + pvOOv[:, :, :, q0:q1] * .5 goovv[:, :, q0:q1, :] += lib.einsum('dlia,jlbd->ijab', tmp, t2[:, :, :, p0:p1]).conj() pvOOv = pvoOV = tau = None time1 = log.timer_debug1('rdm intermediates pass2 [%d:%d]' % (p0, p1), *time1) h5fobj['dovov'] = goovv.transpose(0, 2, 1, 3) * 2 - goovv.transpose( 1, 2, 0, 3) goovv = goooo = None max_memory = max(0, mycc.max_memory - lib.current_memory()[0]) unit = max(nocc**2 * nvir * 2 + nocc * nvir**2 * 3, nvir**3 * 2 + nocc * nvir**2 * 2 + nocc**2 * nvir * 2) blksize = min(nvir, max(ccsd.BLKMIN, int(max_memory * .9e6 / 8 / unit))) iobuflen = int(256e6 / 8 / blksize) log.debug1( 'rdm intermediates pass 3: block size = %d, nvir = %d in %d blocks', blksize, nocc, int((nvir + blksize - 1) / blksize)) dovvv = h5fobj.create_dataset('dovvv', (nocc, nvir, nvir, nvir), dtype, chunks=(nocc, min(nocc, nvir), 1, nvir)) time1 = time.clock(), time.time() for istep, (p0, p1) in enumerate(lib.prange(0, nvir, blksize)): l2tmp = l2[:, :, p0:p1] gvvvv = lib.einsum('ijab,ijcd->abcd', l2tmp, t2) jabc = lib.einsum('ijab,ic->jabc', l2tmp, t1) gvvvv += lib.einsum('jabc,jd->abcd', jabc, t1) l2tmp = jabc = None if compress_vvvv: # symmetrize dvvvv because it does not affect the results of ccsd_grad # dvvvv = gvvvv.transpose(0,2,1,3)-gvvvv.transpose(0,3,1,2)*.5 # dvvvv = (dvvvv+dvvvv.transpose(0,1,3,2)) * .5 # dvvvv = (dvvvv+dvvvv.transpose(1,0,2,3)) * .5 # now dvvvv == dvvvv.transpose(0,1,3,2) == dvvvv.transpose(1,0,3,2) tmp = numpy.empty((nvir, nvir, nvir)) tmpvvvv = numpy.empty((p1 - p0, nvir, nvir_pair)) for i in range(p1 - p0): vvv = gvvvv[i].conj().transpose(1, 0, 2) tmp[:] = vvv - vvv.transpose(2, 1, 0) * .5 lib.pack_tril(tmp + tmp.transpose(0, 2, 1), out=tmpvvvv[i]) # tril of (dvvvv[p0:p1,p0:p1]+dvvvv[p0:p1,p0:p1].T) for i in range(p0, p1): for j in range(p0, i): tmpvvvv[i - p0, j] += tmpvvvv[j - p0, i] tmpvvvv[i - p0, i] *= 2 for i in range(p1, nvir): off = i * (i + 1) // 2 dvvvv[off + p0:off + p1] = tmpvvvv[:, i] for i in range(p0, p1): off = i * (i + 1) // 2 if p0 > 0: tmpvvvv[i - p0, :p0] += dvvvv[off:off + p0] dvvvv[off:off + i + 1] = tmpvvvv[i - p0, :i + 1] * .25 tmp = tmpvvvv = None else: for i in range(p0, p1): vvv = gvvvv[i - p0].conj().transpose(1, 0, 2) dvvvv[i] = vvv - vvv.transpose(2, 1, 0) * .5 gvovv = lib.einsum('adbc,id->aibc', gvvvv, -t1) gvvvv = None gvovv += lib.einsum('akic,kb->aibc', fswap['mvoOV'][p0:p1], t1) gvovv -= lib.einsum('akib,kc->aibc', fswap['mvOOv'][p0:p1], t1) gvovv += lib.einsum('ja,jibc->aibc', l1[:, p0:p1], t2) gvovv += lib.einsum('ja,jb,ic->aibc', l1[:, p0:p1], t1, t1) gvovv += numpy.einsum('ba,ic->aibc', mvv[:, p0:p1] * .5, t1) gvovv = gvovv.conj() gvovv += lib.einsum('ja,jibc->aibc', t1[:, p0:p1], l2) dovvv[:, :, p0:p1] = gvovv.transpose(1, 3, 0, 2) * 2 - gvovv.transpose( 1, 2, 0, 3) gvvov = None time1 = log.timer_debug1('rdm intermediates pass3 [%d:%d]' % (p0, p1), *time1) fswap = None dvvov = None return (h5fobj['dovov'], h5fobj['dvvvv'], h5fobj['doooo'], h5fobj['doovv'], h5fobj['dovvo'], dvvov, h5fobj['dovvv'], h5fobj['dooov'])
def restore(symmetry, eri, norb, tao=None): r'''Convert the 2e integrals (in Chemist's notation) between different level of permutation symmetry (8-fold, 4-fold, or no symmetry) Args: symmetry : int or str code to present the target symmetry of 2e integrals | 's8' or '8' or 8 : 8-fold symmetry | 's4' or '4' or 4 : 4-fold symmetry | 's1' or '1' or 1 : no symmetry | 's2ij' or '2ij' : symmetric ij pair for (ij|kl) (TODO) | 's2ij' or '2kl' : symmetric kl pair for (ij|kl) (TODO) Note the 4-fold symmetry requires (ij|kl) == (ij|lk) == (ij|lk) while (ij|kl) != (kl|ij) is not required. eri : ndarray The symmetry of eri is determined by the size of eri and norb norb : int The symmetry of eri is determined by the size of eri and norb Returns: ndarray. The shape depends on the target symmetry. | 8 : (norb*(norb+1)/2)*(norb*(norb+1)/2+1)/2 | 4 : (norb*(norb+1)/2, norb*(norb+1)/2) | 1 : (norb, norb, norb, norb) Examples: >>> from pyscf import gto >>> from pyscf.scf import _vhf >>> from pyscf import ao2mo >>> mol = gto.M(atom='O 0 0 0; H 0 1 0; H 0 0 1', basis='sto3g') >>> eri = mol.intor('int2e') >>> eri1 = ao2mo.restore(1, eri, mol.nao_nr()) >>> eri4 = ao2mo.restore(4, eri, mol.nao_nr()) >>> eri8 = ao2mo.restore(8, eri, mol.nao_nr()) >>> print(eri1.shape) (7, 7, 7, 7) >>> print(eri1.shape) (28, 28) >>> print(eri1.shape) (406,) ''' targetsym = _stand_sym_code(symmetry) if targetsym not in ('8', '4', '1', '2kl', '2ij'): raise ValueError('symmetry = %s' % symmetry) if eri.dtype != numpy.double: raise RuntimeError('Complex integrals not supported') eri = numpy.asarray(eri, order='C') npair = norb * (norb + 1) // 2 if eri.size == norb**4: # s1 if targetsym == '1': return eri.reshape(norb, norb, norb, norb) elif targetsym == '2kl': eri = lib.pack_tril(eri.reshape(norb**2, norb, norb)) return eri.reshape(norb, norb, npair) elif targetsym == '2ij': eri = lib.pack_tril(eri.reshape(norb, norb, norb**2), axis=0) return eri.reshape(npair, norb, norb) else: return _convert('1', targetsym, eri, norb) elif eri.size == npair**2: # s4 if targetsym == '4': return eri.reshape(npair, npair) elif targetsym == '8': return lib.pack_tril(eri.reshape(npair, npair)) elif targetsym == '2kl': return lib.unpack_tril(eri, lib.SYMMETRIC, axis=0) elif targetsym == '2ij': return lib.unpack_tril(eri, lib.SYMMETRIC, axis=-1) else: return _convert('4', targetsym, eri, norb) elif eri.size == npair * (npair + 1) // 2: # 8-fold if targetsym == '8': return eri.ravel() elif targetsym == '4': return lib.unpack_tril(eri.ravel(), lib.SYMMETRIC) elif targetsym == '2kl': return lib.unpack_tril(lib.unpack_tril(eri.ravel()), lib.SYMMETRIC, axis=0) elif targetsym == '2ij': return lib.unpack_tril(lib.unpack_tril(eri.ravel()), lib.SYMMETRIC, axis=-1) else: return _convert('8', targetsym, eri, norb) elif eri.size == npair * norb**2 and eri.shape[0] == npair: # s2ij if targetsym == '2ij': return eri.reshape(npair, norb, norb) elif targetsym == '8': eri = lib.pack_tril(eri.reshape(npair, norb, norb)) return lib.pack_tril(eri) elif targetsym == '4': return lib.pack_tril(eri.reshape(npair, norb, norb)) elif targetsym == '1': eri = lib.unpack_tril(eri.reshape(npair, norb**2), lib.SYMMETRIC, axis=0) return eri.reshape(norb, norb, norb, norb) elif targetsym == '2kl': tril2sq = lib.square_mat_in_trilu_indices(norb) trilidx = numpy.tril_indices(norb) eri = lib.take_2d(eri.reshape(npair, norb**2), tril2sq.ravel(), trilidx[0] * norb + trilidx[1]) return eri.reshape(norb, norb, npair) elif eri.size == npair * norb**2 and eri.shape[-1] == npair: # s2kl if targetsym == '2kl': return eri.reshape(norb, norb, npair) elif targetsym == '8': eri = lib.pack_tril(eri.reshape(norb, norb, npair), axis=0) return lib.pack_tril(eri) elif targetsym == '4': return lib.pack_tril(eri.reshape(norb, norb, npair), axis=0) elif targetsym == '1': eri = lib.unpack_tril(eri.reshape(norb**2, npair), lib.SYMMETRIC, axis=-1) return eri.reshape(norb, norb, norb, norb) elif targetsym == '2ij': tril2sq = lib.square_mat_in_trilu_indices(norb) trilidx = numpy.tril_indices(norb) eri = lib.take_2d(eri.reshape(norb**2, npair), trilidx[0] * norb + trilidx[1], tril2sq.ravel()) return eri.reshape(npair, norb, norb) else: raise RuntimeError('eri.size = %d, norb = %d' % (eri.size, norb))
def ft_fuse(job_id, uniq_kptji_id, sh0, sh1): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) Gaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T Gaux = fuse(Gaux) Gaux *= mydf.weighted_coulG(kpt, False, mesh) kLR = lib.transpose(numpy.asarray(Gaux.real, order='C')) kLI = lib.transpose(numpy.asarray(Gaux.imag, order='C')) j2c = numpy.asarray(fswap['j2c/%d'%uniq_kptji_id]) j2ctag = j2ctags[uniq_kptji_id] naux0 = j2c.shape[0] if ('j2c-/%d' % uniq_kptji_id) in fswap: j2c_negative = numpy.asarray(fswap['j2c-/%d'%uniq_kptji_id]) else: j2c_negative = None if is_zero(kpt): aosym = 's2' else: aosym = 's1' if aosym == 's2' and cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] j3cR = [None] * nkptj j3cI = [None] * nkptj i0 = ao_loc[sh0] i1 = ao_loc[sh1] for k, idx in enumerate(adapted_ji_idx): key = 'j3c-chunks/%d/%d' % (job_id, idx) v = fuse(numpy.asarray(fswap[key])) if aosym == 's2' and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][i0*(i0+1)//2:i1*(i1+1)//2].ravel() j3cR[k] = numpy.asarray(v.real, order='C') if v.dtype == numpy.complex128: j3cI[k] = numpy.asarray(v.imag, order='C') v = None ncol = j3cR[0].shape[1] Gblksize = max(16, int(max_memory*1e6/16/ncol/(nkptj+1))) # +1 for pqkRbuf/pqkIbuf Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(ncol*Gblksize) pqkIbuf = numpy.empty(ncol*Gblksize) buf = numpy.empty(nkptj*ncol*Gblksize, dtype=numpy.complex128) log.alldebug2(' blksize (%d,%d)', Gblksize, ncol) if aosym == 's2': shls_slice = (sh0, sh1, 0, sh1) else: shls_slice = (sh0, sh1, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) for k, idx in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = j3cR[k] else: v = j3cR[k] + j3cI[k] * 1j if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) fswap['j3c-chunks/%d/%d'%(job_id,idx)][:naux0] = v else: fswap['j3c-chunks/%d/%d'%(job_id,idx)][:naux0] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: fswap['j3c-/%d/%d'%(job_id,idx)] = lib.dot(j2c_negative, v)
def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) kLR = kLRs[uniq_kptji_id] kLI = kLIs[uniq_kptji_id] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('cint1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min( max(int(max_memory * .6 * 1e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = pyscf.df.outcore._guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max( 16, int(max_memory * .2 * 1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max( 16, int(max_memory * .4 * 1e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngs) pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.zeros((nkptj, buflen * Gblksize), dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1 + ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = fuse(numpy.asarray(feri['j3c/%d' % idx][:, col0:col1])) if mydf.approx_sr_level == 0: Lpq = numpy.asarray(feri['Lpq/%d' % idx][:, col0:col1]) elif aosym == 's2': Lpq = numpy.asarray(feri['Lpq/0'][:, col0:col1]) else: Lpq = numpy.asarray(Lpq_fake[:, col0:col1]) lib.dot(j2c[uniq_kptji_id], Lpq, -.5, v, 1) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = Lpq = None if aosym == 's2': shls_slice = (bstart, bend, 0, bend) for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, invh, gxyz[p0:p1], gs, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG, ncol), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T aoao[:] = 0 lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) else: shls_slice = (bstart, bend, 0, cell.nbas) ni = ncol // nao for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, invh, gxyz[p0:p1], gs, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG, ni, nao), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ni, nao, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ni, nao, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.transpose(1, 2, 0) pqkI[:] = aoao.imag.transpose(1, 2, 0) aoao[:] = 0 pqkR = pqkR.reshape(-1, nG) pqkI = pqkI.reshape(-1, nG) zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T, -1, j3cR[k], j3cI[k], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): save('j3c/%d' % ji, j3cR[k], col0, col1) else: save('j3c/%d' % ji, j3cR[k] + j3cI[k] * 1j, col0, col1)
def general(eri, mo_coeffs, erifile, dataname='eri_mo', ioblk_size=IOBLK_SIZE, compact=True, verbose=logger.NOTE): '''For the given four sets of orbitals, transfer arbitrary spherical AO integrals to MO integrals on disk. Args: eri : 8-fold reduced eri vector mo_coeffs : 4-item list of ndarray Four sets of orbital coefficients, corresponding to the four indices of (ij|kl) erifile : str or h5py File or h5py Group object To store the transformed integrals, in HDF5 format. Kwargs dataname : str The dataset name in the erifile (ref the hierarchy of HDF5 format http://www.hdfgroup.org/HDF5/doc1.6/UG/09_Groups.html). By assigning different dataname, the existed integral file can be reused. If the erifile contains the dataname, the new integrals data will overwrite the old one. ioblk_size : float or int The block size for IO, large block size may **not** improve performance compact : bool When compact is True, depending on the four oribital sets, the returned MO integrals has (up to 4-fold) permutation symmetry. If it's False, the function will abandon any permutation symmetry, and return the "plain" MO integrals Pseudocode / algorithm: u = mu v = nu l = lambda o = sigma Assume eri's are 8-fold reduced. nij/nkl_pair = npair or i*j/k*l if only transforming a subset First half transform: Initialize half_eri of size (nij_pair,npair) For lo = 1 -> npair Unpack row lo Unpack row lo to matrix E_{uv}^{lo} Transform C_ui^+*E*C_nj -> E_{ij}^{lo} Ravel or pack E_{ij}^{lo} Save E_{ij}^{lo} -> half_eri[:,lo] Second half transform: Initialize h5d_eri of size (nij_pair,nkl_pair) For ij = 1 -> nij_pair Load and unpack half_eri[ij,:] -> E_{lo}^{ij} Transform C_{lk}E_{lo}^{ij}C_{ol} -> E_{kl}^{ij} Repack E_{kl}^{ij} Save E_{kl}^{ij} -> h5d_eri[ij,:] Each matrix is indexed by the composite index ij x kl, where ij/kl is either npair or ixj/kxl, if only a subset of MOs are being transformed. Since entire rows or columns need to be read in, the arrays are chunked such that IOBLK_SIZE = row/col x chunking col/row. For example, for the first half transform, we would save in nij_pair x IOBLK_SIZE/nij_pair, then load in IOBLK_SIZE/nkl_pair x npair for the second half transform. ------ kl -----> |jxl | ij | | v As a first guess, the chunking size is jxl. If the super-rows/cols are larger than IOBLK_SIZE, then the chunk rectangle jxl is trimmed accordingly. The pathological limiting case is where the dimensions nao_pair, nij_pair, or nkl_pair are so large that the arrays are chunked 1x1, in which case IOBLK_SIZE needs to be increased. ''' log = logger.new_logger(None, verbose) log.info('******** ao2mo disk, custom eri ********') nmoi = mo_coeffs[0].shape[1] nmoj = mo_coeffs[1].shape[1] nmok = mo_coeffs[2].shape[1] nmol = mo_coeffs[3].shape[1] nao = mo_coeffs[0].shape[0] nao_pair = nao*(nao+1) // 2 if compact and iden_coeffs(mo_coeffs[0], mo_coeffs[1]): ij_red = False nij_pair = nmoi*(nmoi+1) // 2 else: ij_red = True nij_pair = nmoi*nmoj if compact and iden_coeffs(mo_coeffs[2], mo_coeffs[3]): kl_red = False nkl_pair = nmok*(nmok+1) // 2 else: kl_red = True nkl_pair = nmok*nmol chunks_half = (max(1, numpy.minimum(int(ioblk_size//(nao_pair*f8_size)),nmoj)), max(1, numpy.minimum(int(ioblk_size//(nij_pair*f8_size)),nmol))) ''' ideally, the final transformed eris should have a chunk of nmoj x nmol to optimize read operations. However, I'm chunking the row size so that the write operations during the transform can be done as fast as possible. ''' chunks_full = (numpy.minimum(int(ioblk_size//(nkl_pair*f8_size)),nmoj),nmol) if isinstance(erifile, str): if h5py.is_hdf5(erifile): feri = h5py.File(erifile) if dataname in feri: del(feri[dataname]) else: feri = h5py.File(erifile,'w',libver='latest') else: assert(isinstance(erifile, h5py.Group)) feri = erifile h5d_eri = feri.create_dataset(dataname,(nij_pair,nkl_pair),'f8',chunks=chunks_full) feri_swap = lib.H5TmpFile(libver='latest') half_eri = feri_swap.create_dataset(dataname,(nij_pair,nao_pair),'f8',chunks=chunks_half) log.debug('Memory information:') log.debug(' IOBLK_SIZE (MB): {}'.format(ioblk_size)) log.debug(' jxl {}x{}, half eri chunk dim {}x{}'.format(nmoj,nmol,chunks_half[0],chunks_half[1])) log.debug(' jxl {}x{}, full eri chunk dim {}x{}'.format(nmoj,nmol,chunks_full[0],chunks_full[1])) log.debug(' Final disk eri size (MB): {:.3g}, chunked {:.3g}' .format(nij_pair*nkl_pair*f8_size,numpy.prod(chunks_full)*f8_size)) log.debug(' Half transformed eri size (MB): {:.3g}, chunked {:.3g}' .format(nij_pair*nao_pair*f8_size,numpy.prod(chunks_half)*f8_size)) log.debug(' RAM buffer for half transform (MB): {:.3g}' .format(nij_pair*chunks_half[1]*f8_size*2)) log.debug(' RAM buffer for full transform (MB): {:.3g}' .format(f8_size*chunks_full[0]*nkl_pair*2 + chunks_half[0]*nao_pair*f8_size*2)) def save1(piece,buf): start = piece*chunks_half[1] stop = (piece+1)*chunks_half[1] if stop > nao_pair: stop = nao_pair half_eri[:,start:stop] = buf[:,:stop-start] return def load2(piece): start = piece*chunks_half[0] stop = (piece+1)*chunks_half[0] if stop > nij_pair: stop = nij_pair if start >= nij_pair: start = stop - 1 return half_eri[start:stop,:] def prefetch2(piece): start = piece*chunks_half[0] stop = (piece+1)*chunks_half[0] if stop > nij_pair: stop = nij_pair if start >= nij_pair: start = stop - 1 buf_prefetch[:stop-start,:] = half_eri[start:stop,:] return def save2(piece,buf): start = piece*chunks_full[0] stop = (piece+1)*chunks_full[0] if stop > nij_pair: stop = nij_pair h5d_eri[start:stop,:] = buf[:stop-start,:] return # transform \mu\nu -> ij cput0 = time.clock(), time.time() Cimu = mo_coeffs[0].conj().transpose() buf_write = numpy.empty((nij_pair,chunks_half[1])) buf_out = numpy.empty_like(buf_write) wpiece = 0 with lib.call_in_background(save1) as async_write: for lo in range(nao_pair): if lo % chunks_half[1] == 0 and lo > 0: #save1(wpiece,buf_write) buf_out, buf_write = buf_write, buf_out async_write(wpiece,buf_out) wpiece += 1 buf = lib.unpack_row(eri,lo) uv = lib.unpack_tril(buf) uv = Cimu.dot(uv).dot(mo_coeffs[1]) if ij_red: ij = numpy.ravel(uv) # grabs by row else: ij = lib.pack_tril(uv) buf_write[:,lo % chunks_half[1]] = ij # final write operation & cleanup save1(wpiece,buf_write) log.timer('(uv|lo) -> (ij|lo)', *cput0) uv = None ij = None buf = None # transform \lambda\sigma -> kl cput1 = time.clock(), time.time() Cklam = mo_coeffs[2].conj().transpose() buf_write = numpy.empty((chunks_full[0],nkl_pair)) buf_out = numpy.empty_like(buf_write) buf_read = numpy.empty((chunks_half[0],nao_pair)) buf_prefetch = numpy.empty_like(buf_read) rpiece = 0 wpiece = 0 with lib.call_in_background(save2,prefetch2) as (async_write,prefetch): buf_read = load2(rpiece) prefetch(rpiece+1) for ij in range(nij_pair): if ij % chunks_full[0] == 0 and ij > 0: #save2(wpiece,buf_write) buf_out, buf_write = buf_write, buf_out async_write(wpiece,buf_out) wpiece += 1 if ij % chunks_half[0] == 0 and ij > 0: #buf_read = load2(rpiece) buf_read, buf_prefetch = buf_prefetch, buf_read rpiece += 1 prefetch(rpiece+1) lo = lib.unpack_tril(buf_read[ij % chunks_half[0],:]) lo = Cklam.dot(lo).dot(mo_coeffs[3]) if kl_red: kl = numpy.ravel(lo) else: kl = lib.pack_tril(lo) buf_write[ij % chunks_full[0],:] = kl save2(wpiece,buf_write) log.timer('(ij|lo) -> (ij|kl)', *cput1) if isinstance(erifile, str): feri.close() return erifile
def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) Gaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T Gaux = fuse(Gaux) Gaux *= mydf.weighted_coulG(kpt, False, gs) kLR = Gaux.T.real.copy('C') kLI = Gaux.T.imag.copy('C') j2c = numpy.asarray(feri['j2c/%d' % uniq_kptji_id]) # Note large difference may be found in results between the CD/eig treatments. # In some systems, small integral errors can lead to different treatments of # linear dependency which can be observed in the total energy/orbital energy # around 4th decimal place. # try: # j2c = scipy.linalg.cholesky(j2c, lower=True) # j2ctag = 'CD' # except scipy.linalg.LinAlgError as e: # # Abandon CD treatment for better numerical stablity w, v = scipy.linalg.eigh(j2c) log.debug('MDF metric for kpt %s cond = %.4g, drop %d bfns', uniq_kptji_id, w[0] / w[-1], numpy.count_nonzero(w < df.LINEAR_DEP_THR)) v = v[:, w > df.LINEAR_DEP_THR].T.conj() v /= numpy.sqrt(w[w > df.LINEAR_DEP_THR]).reshape(-1, 1) j2c = v j2ctag = 'eig' naux0 = j2c.shape[0] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min( max(int(max_memory * .6 * 1e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max( 16, int(max_memory * .2 * 1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max( 16, int(max_memory * .4 * 1e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngs, 16384) pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.empty((nkptj, buflen * Gblksize), dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1 + ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = fuse(numpy.asarray(feri['j3c/%d' % idx][:, col0:col1])) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None shls_slice = (bstart, bend, 0, bend) for p0, p1 in lib.prange(0, ngs, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = j3cR[k] else: v = j3cR[k] + j3cI[k] * 1j if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d' % ji][:naux0, col0:col1] = v del (feri['j2c/%d' % uniq_kptji_id]) for k, ji in enumerate(adapted_ji_idx): v = feri['j3c/%d' % ji][:naux0] del (feri['j3c/%d' % ji]) feri['j3c/%d' % ji] = v
def ft_fuse(job_id, uniq_kptji_id, sh0, sh1): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) j2c = numpy.asarray(fswap['j2c/%d' % uniq_kptji_id]) j2ctag = j2ctags[uniq_kptji_id] naux0 = j2c.shape[0] if ('j2c-/%d' % uniq_kptji_id) in fswap: j2c_negative = numpy.asarray(fswap['j2c-/%d' % uniq_kptji_id]) else: j2c_negative = None if is_zero(kpt): aosym = 's2' else: aosym = 's1' if aosym == 's2' and cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] j3cR = [None] * nkptj j3cI = [None] * nkptj i0 = ao_loc[sh0] i1 = ao_loc[sh1] for k, idx in enumerate(adapted_ji_idx): key = 'j3c-chunks/%d/%d' % (job_id, idx) v = numpy.asarray(fswap[key]) if aosym == 's2' and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][i0 * (i0 + 1) // 2:i1 * (i1 + 1) // 2].ravel() j3cR[k] = numpy.asarray(v.real, order='C') if v.dtype == numpy.complex128: j3cI[k] = numpy.asarray(v.imag, order='C') v = None ncol = j3cR[0].shape[1] Gblksize = max(16, int(max_memory * 1e6 / 16 / ncol / (nkptj + 1))) # +1 for pqkRbuf/pqkIbuf Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(ncol * Gblksize) pqkIbuf = numpy.empty(ncol * Gblksize) buf = numpy.empty(nkptj * ncol * Gblksize, dtype=numpy.complex128) log.alldebug2('job_id %d blksize (%d,%d)', job_id, Gblksize, ncol) wcoulG = mydf.weighted_coulG(kpt, False, mesh) fused_cell_slice = (auxcell.nbas, fused_cell.nbas) if aosym == 's2': shls_slice = (sh0, sh1, 0, sh1) else: shls_slice = (sh0, sh1, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): Gaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], fused_cell_slice, b, gxyz[p0:p1], Gvbase, kpt) Gaux *= wcoulG[p0:p1, None] kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR.T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI.T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR.T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI.T, pqkR.T, 1, j3cI[k][naux:], 1) kLR = kLI = None for k, idx in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) fswap['j3c-chunks/%d/%d' % (job_id, idx)][:naux0] = v else: fswap['j3c-chunks/%d/%d' % (job_id, idx)][:naux0] = lib.dot( j2c, v) # low-dimension systems if j2c_negative is not None: fswap['j3c-/%d/%d' % (job_id, idx)] = lib.dot(j2c_negative, v)
def kernel(mc, mo_coeff=None, ci=None, atmlst=None, mf_grad=None, verbose=None): if mo_coeff is None: mo_coeff = mc._scf.mo_coeff if ci is None: ci = mc.ci if mf_grad is None: mf_grad = mc._scf.nuc_grad_method() mol = mc.mol ncore = mc.ncore ncas = mc.ncas nocc = ncore + ncas nelecas = mc.nelecas nao, nmo = mo_coeff.shape nao_pair = nao * (nao + 1) // 2 mo_energy = mc._scf.mo_energy hcore_deriv = mf_grad.hcore_generator(mol) s1 = mf_grad.get_ovlp(mol) mo_occ = mo_coeff[:, :nocc] mo_core = mo_coeff[:, :ncore] mo_cas = mo_coeff[:, ncore:nocc] casdm1, casdm2 = mc.fcisolver.make_rdm12(mc.ci, ncas, nelecas) # gfock = Generalized Fock, Adv. Chem. Phys., 69, 63 dm_core = numpy.dot(mo_core, mo_core.T) * 2 dm_cas = reduce(numpy.dot, (mo_cas, casdm1, mo_cas.T)) aapa = ao2mo.kernel(mol, (mo_cas, mo_cas, mo_occ, mo_cas), compact=False) aapa = aapa.reshape(ncas, ncas, nocc, ncas) vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas)) h1 = mc.get_hcore() vhf_c = vj[0] - vk[0] * .5 vhf_a = vj[1] - vk[1] * .5 gfock = reduce(numpy.dot, (mo_occ.T, h1 + vhf_c + vhf_a, mo_occ)) * 2 gfock[:, ncore:nocc] = reduce(numpy.dot, (mo_occ.T, h1 + vhf_c, mo_cas, casdm1)) gfock[:, ncore:nocc] += numpy.einsum('uviw,vuwt->it', aapa, casdm2) dme0 = reduce(numpy.dot, (mo_occ, (gfock + gfock.T) * .5, mo_occ.T)) aapa = vj = vk = vhf_c = vhf_a = h1 = gfock = None dm1 = dm_core + dm_cas vhf1c, vhf1a = mf_grad.get_veff(mol, (dm_core, dm_cas)) diag_idx = numpy.arange(nao) diag_idx = diag_idx * (diag_idx + 1) // 2 + diag_idx casdm2_cc = casdm2 + casdm2.transpose(0, 1, 3, 2) dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2, ncas**2), mo_cas.T, (0, nao, 0, nao)).reshape(ncas**2, nao, nao) dm2buf = lib.pack_tril(dm2buf) dm2buf[:, diag_idx] *= .5 dm2buf = dm2buf.reshape(ncas, ncas, nao_pair) #casdm2 = casdm2_cc = None atmlst = range(mol.natm) aoslices = mol.aoslice_by_atom() de = numpy.zeros((len(atmlst), 3)) max_memory = mc.max_memory - lib.current_memory()[0] blksize = int(max_memory * .9e6 / 8 / ((aoslices[:, 3] - aoslices[:, 2]).max() * nao_pair)) blksize = min(nao, max(2, blksize)) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = aoslices[ia] h1ao = hcore_deriv(ia) de[k] += numpy.einsum('xij,ij->x', h1ao, dm1) #de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], dme0[p0:p1]) * 2 q1 = 0 for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize): q0, q1 = q1, q1 + nf dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1], mo_cas[q0:q1]) shls_slice = (shl0, shl1, b0, b1, 0, mol.nbas, 0, mol.nbas) eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl', shls_slice=shls_slice).reshape( 3, p1 - p0, nf, nao_pair) de[k] -= numpy.einsum('xijw,ijw->x', eri1, dm2_ao) * 2 eri1 = None de[k] += numpy.einsum('xij,ij->x', vhf1c[:, p0:p1], dm1[p0:p1]) * 2 de[k] += numpy.einsum('xij,ij->x', vhf1a[:, p0:p1], dm_core[p0:p1]) * 2 dm2 = numpy.zeros((nmo, nmo, nmo, nmo)) for i in range(ncore): for j in range(ncore): dm2[i, i, j, j] += 4 dm2[i, j, j, i] -= 2 dm2[i, i, ncore:nocc, ncore:nocc] = casdm1 * 2 dm2[ncore:nocc, ncore:nocc, i, i] = casdm1 * 2 dm2[i, ncore:nocc, ncore:nocc, i] = -casdm1 dm2[ncore:nocc, i, i, ncore:nocc] = -casdm1 dm2[ncore:nocc, ncore:nocc, ncore:nocc, ncore:nocc] = casdm2 eri0 = ao2mo.restore(1, ao2mo.full(mc._scf._eri, mo_coeff), nmo) Imat = numpy.einsum('pjkl,qjkl->pq', eri0, dm2) dm1 = numpy.zeros((nmo, nmo)) for i in range(ncore): dm1[i, i] = 2 dm1[ncore:nocc, ncore:nocc] = casdm1 neleca, nelecb = mol.nelec h1 = -(mol.intor('int1e_ipkin', comp=3) + mol.intor('int1e_ipnuc', comp=3)) s1 = -mol.intor('int1e_ipovlp', comp=3) eri1 = mol.intor('int2e_ip1', comp=3).reshape(3, nao, nao, nao, nao) eri1 = numpy.einsum('xipkl,pj->xijkl', eri1, mo_coeff) eri1 = numpy.einsum('xijpl,pk->xijkl', eri1, mo_coeff) eri1 = numpy.einsum('xijkp,pl->xijkl', eri1, mo_coeff) h0 = reduce(numpy.dot, (mo_coeff.T, mc._scf.get_hcore(), mo_coeff)) g0 = ao2mo.restore(1, ao2mo.full(mol, mo_coeff), nmo) def hess(): nocc = mol.nelectron // 2 nvir = nmo - nocc eri_mo = g0 eai = lib.direct_sum('a-i->ai', mo_energy[nocc:], mo_energy[:nocc]) h = eri_mo[nocc:, :nocc, nocc:, :nocc] * 4 h -= numpy.einsum('cdlk->ckdl', eri_mo[nocc:, nocc:, :nocc, :nocc]) h -= numpy.einsum('cldk->ckdl', eri_mo[nocc:, :nocc, nocc:, :nocc]) for a in range(nvir): for i in range(nocc): h[a, i, a, i] += eai[a, i] return -h.reshape(nocc * nvir, -1) hh = hess() ee = mo_energy[:, None] - mo_energy for k, (sh0, sh1, p0, p1) in enumerate(mol.offset_nr_by_atom()): mol.set_rinv_origin(mol.atom_coord(k)) vrinv = -mol.atom_charge(k) * mol.intor('int1e_iprinv', comp=3) # 2e AO integrals dot 2pdm for i in range(3): g1 = numpy.einsum('pjkl,pi->ijkl', eri1[i, p0:p1], mo_coeff[p0:p1]) g1 = g1 + g1.transpose(1, 0, 2, 3) g1 = g1 + g1.transpose(2, 3, 0, 1) g1 *= -1 hx = (numpy.einsum('pq,pi,qj->ij', h1[i, p0:p1], mo_coeff[p0:p1], mo_coeff) + reduce(numpy.dot, (mo_coeff.T, vrinv[i], mo_coeff))) hx = hx + hx.T sx = numpy.einsum('pq,pi,qj->ij', s1[i, p0:p1], mo_coeff[p0:p1], mo_coeff) sx = sx + sx.T fij = (hx[:neleca, :neleca] - numpy.einsum( 'ij,j->ij', sx[:neleca, :neleca], mo_energy[:neleca]) - numpy.einsum('kl,ijlk->ij', sx[:neleca, :neleca], g0[:neleca, :neleca, :neleca, :neleca]) * 2 + numpy.einsum('kl,iklj->ij', sx[:neleca, :neleca], g0[:neleca, :neleca, :neleca, :neleca]) + numpy.einsum('ijkk->ij', g1[:neleca, :neleca, :neleca, :neleca]) * 2 - numpy.einsum('ikkj->ij', g1[:neleca, :neleca, :neleca, :neleca])) fab = (hx[neleca:, neleca:] - numpy.einsum( 'ij,j->ij', sx[neleca:, neleca:], mo_energy[neleca:]) - numpy.einsum('kl,ijlk->ij', sx[:neleca, :neleca], g0[neleca:, neleca:, :neleca, :neleca]) * 2 + numpy.einsum('kl,iklj->ij', sx[:neleca, :neleca], g0[neleca:, :neleca, :neleca, neleca:]) + numpy.einsum('ijkk->ij', g1[neleca:, neleca:, :neleca, :neleca]) * 2 - numpy.einsum('ikkj->ij', g1[neleca:, :neleca, :neleca, neleca:])) fai = (hx[neleca:, :neleca] - numpy.einsum( 'ai,i->ai', sx[neleca:, :neleca], mo_energy[:neleca]) - numpy.einsum('kl,ijlk->ij', sx[:neleca, :neleca], g0[neleca:, :neleca, :neleca, :neleca]) * 2 + numpy.einsum('kl,iklj->ij', sx[:neleca, :neleca], g0[neleca:, :neleca, :neleca, :neleca]) + numpy.einsum('ijkk->ij', g1[neleca:, :neleca, :neleca, :neleca]) * 2 - numpy.einsum('ikkj->ij', g1[neleca:, :neleca, :neleca, :neleca])) c1 = numpy.zeros((nmo, nmo)) c1[:neleca, :neleca] = -.5 * sx[:neleca, :neleca] c1[neleca:, neleca:] = -.5 * sx[neleca:, neleca:] cvo1 = numpy.linalg.solve(hh, fai.ravel()).reshape(-1, neleca) cov1 = -(sx[neleca:, :neleca] + cvo1).T c1[neleca:, :neleca] = cvo1 c1[:neleca, neleca:] = cov1 v1 = numpy.einsum('pqai,ai->pq', g0[:, :, neleca:, :neleca], cvo1) * 4 v1 -= numpy.einsum('paiq,ai->pq', g0[:, neleca:, :neleca, :], cvo1) v1 -= numpy.einsum('piaq,ai->pq', g0[:, :neleca, neleca:, :], cvo1) fij += v1[:neleca, :neleca] fab += v1[neleca:, neleca:] c1[:ncore, ncore:neleca] = -fij[:ncore, ncore:] / ee[:ncore, ncore:neleca] c1[ncore:neleca, :ncore] = -fij[ncore:, :ncore] / ee[ ncore:neleca, :ncore] m = nocc - neleca c1[nocc:, neleca:nocc] = -fab[m:, :m] / ee[nocc:, neleca:nocc] c1[neleca:nocc, nocc:] = -fab[:m, m:] / ee[neleca:nocc, nocc:] h0c1 = h0.dot(c1) h0c1 = h0c1 + h0c1.T g0c1 = numpy.einsum('pjkl,pi->ijkl', g0, c1) g0c1 = g0c1 + g0c1.transpose(1, 0, 2, 3) g0c1 = g0c1 + g0c1.transpose(2, 3, 0, 1) de[k, i] += numpy.einsum('ij,ji', h0c1, dm1) de[k, i] += numpy.einsum('ijkl,jilk', g0c1, dm2) * .5 de += rhf_grad.grad_nuc(mol) return de
def make_phi(pcmobj, dm, r_vdw, ui, ylm_1sph, with_nuc=True): ''' Induced potential of ddCOSMO model Kwargs: with_nuc (bool): Mute the contribution of nuclear charges when computing the second order derivatives of energy ''' mol = pcmobj.mol natm = mol.natm coords_1sph, weights_1sph = make_grids_one_sphere(pcmobj.lebedev_order) ngrid_1sph = coords_1sph.shape[0] dms = numpy.asarray(dm) is_single_dm = dms.ndim == 2 nao = dms.shape[-1] dms = dms.reshape(-1, nao, nao) n_dm = dms.shape[0] diagidx = numpy.arange(nao) diagidx = diagidx * (diagidx + 1) // 2 + diagidx tril_dm = lib.pack_tril(dms + dms.transpose(0, 2, 1)) tril_dm[:, diagidx] *= .5 atom_coords = mol.atom_coords() atom_charges = mol.atom_charges() extern_point_idx = ui > 0 cav_coords = (atom_coords.reshape(natm, 1, 3) + numpy.einsum('r,gx->rgx', r_vdw, coords_1sph)) v_phi = numpy.zeros((n_dm, natm, ngrid_1sph)) if with_nuc: for ia in range(natm): # Note (-) sign is not applied to atom_charges, because (-) is explicitly # included in rhs and L matrix d_rs = atom_coords.reshape(-1, 1, 3) - cav_coords[ia] v_phi[:, ia] = numpy.einsum('z,zp->p', atom_charges, 1. / lib.norm(d_rs, axis=2)) max_memory = pcmobj.max_memory - lib.current_memory()[0] blksize = int(max(max_memory * .9e6 / 8 / nao**2, 400)) cav_coords = cav_coords[extern_point_idx] v_phi_e = numpy.empty((n_dm, cav_coords.shape[0])) int3c2e = mol._add_suffix('int3c2e') cintopt = gto.moleintor.make_cintopt(mol._atm, mol._bas, mol._env, int3c2e) for i0, i1 in lib.prange(0, cav_coords.shape[0], blksize): fakemol = gto.fakemol_for_charges(cav_coords[i0:i1]) v_nj = df.incore.aux_e2(mol, fakemol, intor=int3c2e, aosym='s2ij', cintopt=cintopt) v_phi_e[:, i0:i1] = numpy.einsum('nx,xk->nk', tril_dm, v_nj) v_phi[:, extern_point_idx] -= v_phi_e phi = -numpy.einsum('n,xn,jn,ijn->ijx', weights_1sph, ylm_1sph, ui, v_phi) if is_single_dm: phi = phi[0] return phi
def make_kpt(uniq_kptji_id, cholesky_j2c): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) j2c, j2c_negative, j2ctag = cholesky_j2c Gaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T Gaux = fuse(Gaux) Gaux *= mydf.weighted_coulG(kpt, False, mesh) kLR = Gaux.T.real.copy('C') kLI = Gaux.T.imag.copy('C') if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory * .38e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory * .1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max(16, int(max_memory * .2e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngrids, 16384) def load(aux_slice): col0, col1 = aux_slice j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = [ fswap['j3c-junk/%d/%d' % (idx, i)][0, col0:col1].T for i in range(nsegs) ] v = fuse(numpy.vstack(v)) if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None return j3cR, j3cI pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.empty((nkptj, buflen * Gblksize), dtype=numpy.complex128) cols = [sh_range[2] for sh_range in shranges] locs = numpy.append(0, numpy.cumsum(cols)) tasks = zip(locs[:-1], locs[1:]) for istep, (j3cR, j3cI) in enumerate(lib.map_with_prefetch(load, tasks)): bstart, bend, ncol = shranges[istep] log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', istep + 1, len(shranges), bstart, bend, ncol) if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = j3cR[k] else: v = j3cR[k] + j3cI[k] * 1j if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d' % (ji, istep)] = v else: feri['j3c/%d/%d' % (ji, istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d' % (ji, istep)] = lib.dot(j2c_negative, v) j3cR = j3cI = None for ji in adapted_ji_idx: del (fswap['j3c-junk/%d' % ji])
def dm_for_vj_tril(dm): dmtril = lib.pack_tril(dm+dm.T.conj()) dmtril[i*(i+1)//2+i] *= .5 return dmtril
def grad_elec(mc_grad, mo_coeff=None, ci=None, atmlst=None, verbose=None): mc = mc_grad.base if mo_coeff is None: mo_coeff = mc._scf.mo_coeff if ci is None: ci = mc.ci time0 = logger.process_clock(), logger.perf_counter() log = logger.new_logger(mc_grad, verbose) mol = mc_grad.mol ncore = mc.ncore ncas = mc.ncas nocc = ncore + ncas nelecas = mc.nelecas nao, nmo = mo_coeff.shape nao_pair = nao * (nao + 1) // 2 mo_energy = mc._scf.mo_energy mo_occ = mo_coeff[:, :nocc] mo_core = mo_coeff[:, :ncore] mo_cas = mo_coeff[:, ncore:nocc] neleca, nelecb = mol.nelec assert (neleca == nelecb) orbo = mo_coeff[:, :neleca] orbv = mo_coeff[:, neleca:] casdm1, casdm2 = mc.fcisolver.make_rdm12(ci, ncas, nelecas) dm_core = numpy.dot(mo_core, mo_core.T) * 2 dm_cas = reduce(numpy.dot, (mo_cas, casdm1, mo_cas.T)) aapa = ao2mo.kernel(mol, (mo_cas, mo_cas, mo_coeff, mo_cas), compact=False) aapa = aapa.reshape(ncas, ncas, nmo, ncas) vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas)) h1 = mc.get_hcore() vhf_c = vj[0] - vk[0] * .5 vhf_a = vj[1] - vk[1] * .5 # Imat = h1_{pi} gamma1_{iq} + h2_{pijk} gamma_{iqkj} Imat = numpy.zeros((nmo, nmo)) Imat[:, :nocc] = reduce(numpy.dot, (mo_coeff.T, h1 + vhf_c + vhf_a, mo_occ)) * 2 Imat[:, ncore:nocc] = reduce(numpy.dot, (mo_coeff.T, h1 + vhf_c, mo_cas, casdm1)) Imat[:, ncore:nocc] += lib.einsum('uviw,vuwt->it', aapa, casdm2) aapa = vj = vk = vhf_c = vhf_a = h1 = None ee = mo_energy[:, None] - mo_energy zvec = numpy.zeros_like(Imat) zvec[:ncore, ncore:neleca] = Imat[:ncore, ncore:neleca] / -ee[:ncore, ncore:neleca] zvec[ncore:neleca, :ncore] = Imat[ ncore:neleca, :ncore] / -ee[ncore:neleca, :ncore] zvec[nocc:, neleca:nocc] = Imat[nocc:, neleca:nocc] / -ee[nocc:, neleca:nocc] zvec[neleca:nocc, nocc:] = Imat[neleca:nocc, nocc:] / -ee[neleca:nocc, nocc:] zvec_ao = reduce(numpy.dot, (mo_coeff, zvec + zvec.T, mo_coeff.T)) vhf = mc._scf.get_veff(mol, zvec_ao) * 2 xvo = reduce(numpy.dot, (orbv.T, vhf, orbo)) xvo += Imat[neleca:, :neleca] - Imat[:neleca, neleca:].T def fvind(x): x = x.reshape(xvo.shape) dm = reduce(numpy.dot, (orbv, x, orbo.T)) v = mc._scf.get_veff(mol, dm + dm.T) v = reduce(numpy.dot, (orbv.T, v, orbo)) return v * 2 dm1resp = cphf.solve(fvind, mo_energy, mc._scf.mo_occ, xvo, max_cycle=30)[0] zvec[neleca:, :neleca] = dm1resp zeta = numpy.einsum('ij,j->ij', zvec, mo_energy) zeta = reduce(numpy.dot, (mo_coeff, zeta, mo_coeff.T)) zvec_ao = reduce(numpy.dot, (mo_coeff, zvec + zvec.T, mo_coeff.T)) p1 = numpy.dot(mo_coeff[:, :neleca], mo_coeff[:, :neleca].T) vhf_s1occ = reduce(numpy.dot, (p1, mc._scf.get_veff(mol, zvec_ao), p1)) Imat[:ncore, ncore:neleca] = 0 Imat[ncore:neleca, :ncore] = 0 Imat[nocc:, neleca:nocc] = 0 Imat[neleca:nocc, nocc:] = 0 Imat[neleca:, :neleca] = Imat[:neleca, neleca:].T im1 = reduce(numpy.dot, (mo_coeff, Imat, mo_coeff.T)) casci_dm1 = dm_core + dm_cas hf_dm1 = mc._scf.make_rdm1(mo_coeff, mc._scf.mo_occ) hcore_deriv = mc_grad.hcore_generator(mol) s1 = mc_grad.get_ovlp(mol) diag_idx = numpy.arange(nao) diag_idx = diag_idx * (diag_idx + 1) // 2 + diag_idx casdm2_cc = casdm2 + casdm2.transpose(0, 1, 3, 2) dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2, ncas**2), mo_cas.T, (0, nao, 0, nao)).reshape(ncas**2, nao, nao) dm2buf = lib.pack_tril(dm2buf) dm2buf[:, diag_idx] *= .5 dm2buf = dm2buf.reshape(ncas, ncas, nao_pair) casdm2 = casdm2_cc = None if atmlst is None: atmlst = range(mol.natm) aoslices = mol.aoslice_by_atom() de = numpy.zeros((len(atmlst), 3)) max_memory = mc_grad.max_memory - lib.current_memory()[0] blksize = int(max_memory * .9e6 / 8 / ((aoslices[:, 3] - aoslices[:, 2]).max() * nao_pair)) blksize = min(nao, max(2, blksize)) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = aoslices[ia] h1ao = hcore_deriv(ia) de[k] += numpy.einsum('xij,ij->x', h1ao, casci_dm1) de[k] += numpy.einsum('xij,ij->x', h1ao, zvec_ao) q1 = 0 for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize): q0, q1 = q1, q1 + nf dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1], mo_cas[q0:q1]) shls_slice = (shl0, shl1, b0, b1, 0, mol.nbas, 0, mol.nbas) eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl', shls_slice=shls_slice).reshape( 3, p1 - p0, nf, nao_pair) de[k] -= numpy.einsum('xijw,ijw->x', eri1, dm2_ao) * 2 for i in range(3): eri1tmp = lib.unpack_tril(eri1[i].reshape((p1 - p0) * nf, -1)) eri1tmp = eri1tmp.reshape(p1 - p0, nf, nao, nao) de[k, i] -= numpy.einsum('ijkl,ij,kl', eri1tmp, hf_dm1[p0:p1, q0:q1], zvec_ao) * 2 de[k, i] -= numpy.einsum('ijkl,kl,ij', eri1tmp, hf_dm1, zvec_ao[p0:p1, q0:q1]) * 2 de[k, i] += numpy.einsum('ijkl,il,kj', eri1tmp, hf_dm1[p0:p1], zvec_ao[q0:q1]) de[k, i] += numpy.einsum('ijkl,jk,il', eri1tmp, hf_dm1[q0:q1], zvec_ao[p0:p1]) #:vhf1c, vhf1a = mc_grad.get_veff(mol, (dm_core, dm_cas)) #:de[k] += numpy.einsum('xij,ij->x', vhf1c[:,p0:p1], casci_dm1[p0:p1]) * 2 #:de[k] += numpy.einsum('xij,ij->x', vhf1a[:,p0:p1], dm_core[p0:p1]) * 2 de[k, i] -= numpy.einsum('ijkl,lk,ij', eri1tmp, dm_core[q0:q1], casci_dm1[p0:p1]) * 2 de[k, i] += numpy.einsum('ijkl,jk,il', eri1tmp, dm_core[q0:q1], casci_dm1[p0:p1]) de[k, i] -= numpy.einsum('ijkl,lk,ij', eri1tmp, dm_cas[q0:q1], dm_core[p0:p1]) * 2 de[k, i] += numpy.einsum('ijkl,jk,il', eri1tmp, dm_cas[q0:q1], dm_core[p0:p1]) eri1 = eri1tmp = None de[k] -= numpy.einsum('xij,ij->x', s1[:, p0:p1], im1[p0:p1]) de[k] -= numpy.einsum('xij,ji->x', s1[:, p0:p1], im1[:, p0:p1]) de[k] -= numpy.einsum('xij,ij->x', s1[:, p0:p1], zeta[p0:p1]) * 2 de[k] -= numpy.einsum('xij,ji->x', s1[:, p0:p1], zeta[:, p0:p1]) * 2 de[k] -= numpy.einsum('xij,ij->x', s1[:, p0:p1], vhf_s1occ[p0:p1]) * 2 de[k] -= numpy.einsum('xij,ji->x', s1[:, p0:p1], vhf_s1occ[:, p0:p1]) * 2 log.timer('CASCI nuclear gradients', *time0) return de
def __init__(self, cc, mo_coeff=None, method='incore'): cput0 = (time.clock(), time.time()) moidx = numpy.ones(cc.mo_energy.size, dtype=numpy.bool) if isinstance(cc.frozen, (int, numpy.integer)): moidx[:cc.frozen] = False elif len(cc.frozen) > 0: moidx[numpy.asarray(cc.frozen)] = False if mo_coeff is None: self.mo_coeff = mo_coeff = cc.mo_coeff[:,moidx] self.fock = numpy.diag(cc.mo_energy[moidx]) else: # If mo_coeff is not canonical orbital self.mo_coeff = mo_coeff = mo_coeff[:,moidx] dm = cc._scf.make_rdm1(cc.mo_coeff, cc.mo_occ) fockao = cc._scf.get_hcore() + cc._scf.get_veff(cc.mol, dm) self.fock = reduce(numpy.dot, (mo_coeff.T, fockao, mo_coeff)) nocc = cc.nocc() nmo = cc.nmo() nvir = nmo - nocc mem_incore, mem_outcore, mem_basic = _mem_usage(nocc, nvir) mem_now = pyscf.lib.current_memory()[0] log = logger.Logger(cc.stdout, cc.verbose) if (method == 'incore' and cc._scf._eri is not None and (mem_incore+mem_now < cc.max_memory) or cc.mol.incore_anyway): eri1 = pyscf.ao2mo.incore.full(cc._scf._eri, mo_coeff) #:eri1 = pyscf.ao2mo.restore(1, eri1, nmo) #:self.oooo = eri1[:nocc,:nocc,:nocc,:nocc].copy() #:self.ooov = eri1[:nocc,:nocc,:nocc,nocc:].copy() #:self.ovoo = eri1[:nocc,nocc:,:nocc,:nocc].copy() #:self.oovv = eri1[:nocc,:nocc,nocc:,nocc:].copy() #:self.ovov = eri1[:nocc,nocc:,:nocc,nocc:].copy() #:ovvv = eri1[:nocc,nocc:,nocc:,nocc:].copy() #:self.ovvv = numpy.empty((nocc,nvir,nvir*(nvir+1)//2)) #:for i in range(nocc): #: for j in range(nvir): #: self.ovvv[i,j] = lib.pack_tril(ovvv[i,j]) #:self.vvvv = pyscf.ao2mo.restore(4, eri1[nocc:,nocc:,nocc:,nocc:], nvir) nvir_pair = nvir * (nvir+1) // 2 self.oooo = numpy.empty((nocc,nocc,nocc,nocc)) self.ooov = numpy.empty((nocc,nocc,nocc,nvir)) self.ovoo = numpy.empty((nocc,nvir,nocc,nocc)) self.oovv = numpy.empty((nocc,nocc,nvir,nvir)) self.ovov = numpy.empty((nocc,nvir,nocc,nvir)) self.ovvv = numpy.empty((nocc,nvir,nvir_pair)) self.vvvv = numpy.empty((nvir_pair,nvir_pair)) ij = 0 outbuf = numpy.empty((nmo,nmo,nmo)) for i in range(nocc): buf = _ccsd.unpack_tril(eri1[ij:ij+i+1], out=outbuf[:i+1]) for j in range(i+1): self.oooo[i,j] = self.oooo[j,i] = buf[j,:nocc,:nocc] self.ooov[i,j] = self.ooov[j,i] = buf[j,:nocc,nocc:] self.oovv[i,j] = self.oovv[j,i] = buf[j,nocc:,nocc:] ij += i + 1 ij1 = 0 for i in range(nocc,nmo): buf = _ccsd.unpack_tril(eri1[ij:ij+i+1], out=outbuf[:i+1]) self.ovoo[:,i-nocc] = buf[:nocc,:nocc,:nocc] self.ovov[:,i-nocc] = buf[:nocc,:nocc,nocc:] for j in range(nocc): self.ovvv[j,i-nocc] = lib.pack_tril(_cp(buf[j,nocc:,nocc:])) for j in range(nocc, i+1): self.vvvv[ij1] = lib.pack_tril(_cp(buf[j,nocc:,nocc:])) ij1 += 1 ij += i + 1 else: cput1 = time.clock(), time.time() _tmpfile1 = tempfile.NamedTemporaryFile() _tmpfile2 = tempfile.NamedTemporaryFile() self.feri1 = h5py.File(_tmpfile1.name) orbo = mo_coeff[:,:nocc] orbv = mo_coeff[:,nocc:] nvpair = nvir * (nvir+1) // 2 self.oooo = self.feri1.create_dataset('oooo', (nocc,nocc,nocc,nocc), 'f8') self.ooov = self.feri1.create_dataset('ooov', (nocc,nocc,nocc,nvir), 'f8') self.ovoo = self.feri1.create_dataset('ovoo', (nocc,nvir,nocc,nocc), 'f8') self.oovv = self.feri1.create_dataset('oovv', (nocc,nocc,nvir,nvir), 'f8') self.ovov = self.feri1.create_dataset('ovov', (nocc,nvir,nocc,nvir), 'f8') self.ovvv = self.feri1.create_dataset('ovvv', (nocc,nvir,nvpair), 'f8') self.feri2 = h5py.File(_tmpfile2.name, 'w') pyscf.ao2mo.full(cc.mol, orbv, self.feri2, verbose=log) self.vvvv = self.feri2['eri_mo'] cput1 = log.timer_debug1('transforming vvvv', *cput1) tmpfile3 = tempfile.NamedTemporaryFile() with h5py.File(tmpfile3.name, 'w') as feri: pyscf.ao2mo.general(cc.mol, (orbo,mo_coeff,mo_coeff,mo_coeff), feri, verbose=log) cput1 = log.timer_debug1('transforming oppp', *cput1) eri1 = feri['eri_mo'] outbuf = numpy.empty((nmo,nmo,nmo)) for i in range(nocc): buf = _ccsd.unpack_tril(_cp(eri1[i*nmo:(i+1)*nmo]), out=outbuf) self.oooo[i] = buf[:nocc,:nocc,:nocc] self.ooov[i] = buf[:nocc,:nocc,nocc:] self.ovoo[i] = buf[nocc:,:nocc,:nocc] self.oovv[i] = buf[:nocc,nocc:,nocc:] self.ovov[i] = buf[nocc:,:nocc,nocc:] self.ovvv[i] = _ccsd.pack_tril(_cp(buf[nocc:,nocc:,nocc:])) cput1 = log.timer_debug1('sorting %d'%i, *cput1) for key in feri.keys(): del(feri[key]) log.timer('CCSD integral transformation', *cput0)
def aux_e1(cell, auxcell, erifile, intor='int3c2e', aosym='s2ij', comp=None, kptij_lst=None, dataname='eri_mo', shls_slice=None, max_memory=2000, verbose=0): r'''3-center AO integrals (L|ij) with double lattice sum: \sum_{lm} (L[0]|i[l]j[m]), where L is the auxiliary basis. Three-index integral tensor (kptij_idx, naux, nao_pair) or four-index integral tensor (kptij_idx, comp, naux, nao_pair) are stored on disk. Args: kptij_lst : (*,2,3) array A list of (kpti, kptj) ''' intor, comp = gto.moleintor._get_intor_and_comp(cell._add_suffix(intor), comp) if isinstance(erifile, h5py.Group): feri = erifile elif h5py.is_hdf5(erifile): feri = h5py.File(erifile, 'a') else: feri = h5py.File(erifile, 'w') if dataname in feri: del (feri[dataname]) if dataname + '-kptij' in feri: del (feri[dataname + '-kptij']) if kptij_lst is None: kptij_lst = numpy.zeros((1, 2, 3)) feri[dataname + '-kptij'] = kptij_lst if shls_slice is None: shls_slice = (0, cell.nbas, 0, cell.nbas, 0, auxcell.nbas) ao_loc = cell.ao_loc_nr() aux_loc = auxcell.ao_loc_nr(auxcell.cart or 'ssc' in intor)[:shls_slice[5] + 1] ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]] nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]] naux = aux_loc[shls_slice[5]] - aux_loc[shls_slice[4]] nkptij = len(kptij_lst) nii = (ao_loc[shls_slice[1]] * (ao_loc[shls_slice[1]] + 1) // 2 - ao_loc[shls_slice[0]] * (ao_loc[shls_slice[0]] + 1) // 2) nij = ni * nj kpti = kptij_lst[:, 0] kptj = kptij_lst[:, 1] aosym_ks2 = abs(kpti - kptj).sum(axis=1) < KPT_DIFF_TOL j_only = numpy.all(aosym_ks2) #aosym_ks2 &= (aosym[:2] == 's2' and shls_slice[:2] == shls_slice[2:4]) aosym_ks2 &= aosym[:2] == 's2' for k, kptij in enumerate(kptij_lst): key = '%s/%d' % (dataname, k) if gamma_point(kptij): dtype = 'f8' else: dtype = 'c16' if aosym_ks2[k]: nao_pair = nii else: nao_pair = nij if comp == 1: shape = (naux, nao_pair) else: shape = (comp, naux, nao_pair) feri.create_dataset(key, shape, dtype) if naux == 0: feri.close() return erifile if j_only and aosym[:2] == 's2': assert (shls_slice[2] == 0) nao_pair = nii else: nao_pair = nij if gamma_point(kptij_lst): dtype = numpy.double else: dtype = numpy.complex128 buflen = max(8, int(max_memory * 1e6 / 16 / (nkptij * ni * nj * comp))) auxdims = aux_loc[shls_slice[4] + 1:shls_slice[5] + 1] - aux_loc[shls_slice[4]:shls_slice[5]] auxranges = balance_segs(auxdims, buflen) buflen = max([x[2] for x in auxranges]) buf = numpy.empty(nkptij * comp * ni * nj * buflen, dtype=dtype) buf1 = numpy.empty(ni * nj * buflen, dtype=dtype) int3c = wrap_int3c(cell, auxcell, intor, aosym, comp, kptij_lst) naux0 = 0 for istep, auxrange in enumerate(auxranges): sh0, sh1, nrow = auxrange sub_slice = (shls_slice[0], shls_slice[1], shls_slice[2], shls_slice[3], shls_slice[4] + sh0, shls_slice[4] + sh1) mat = numpy.ndarray((nkptij, comp, nao_pair, nrow), dtype=dtype, buffer=buf) mat = int3c(sub_slice, mat) for k, kptij in enumerate(kptij_lst): h5dat = feri['%s/%d' % (dataname, k)] for icomp, v in enumerate(mat[k]): v = lib.transpose(v, out=buf1) if gamma_point(kptij): v = v.real if aosym_ks2[k] and v.shape[1] == ni**2: v = lib.pack_tril(v.reshape(-1, ni, ni)) if comp == 1: h5dat[naux0:naux0 + nrow] = v else: h5dat[icomp, naux0:naux0 + nrow] = v naux0 += nrow if not isinstance(erifile, h5py.Group): feri.close() return erifile