def kernel(mycc, t1=None, t2=None, l1=None, l2=None, eris=None, atmlst=None, mf_grad=None, d1=None, d2=None, verbose=logger.INFO): if eris is not None: if abs(eris.fock - numpy.diag(eris.fock.diagonal())).max() > 1e-3: raise RuntimeError( 'CCSD gradients does not support NHF (non-canonical HF)') if t1 is None: t1 = mycc.t1 if t2 is None: t2 = mycc.t2 if l1 is None: l1 = mycc.l1 if l2 is None: l2 = mycc.l2 if mf_grad is None: mf_grad = mycc._scf.nuc_grad_method() log = logger.new_logger(mycc, verbose) time0 = time.clock(), time.time() log.debug('Build ccsd rdm1 intermediates') if d1 is None: d1 = ccsd_rdm._gamma1_intermediates(mycc, t1, t2, l1, l2) doo, dov, dvo, dvv = d1 time1 = log.timer_debug1('rdm1 intermediates', *time0) log.debug('Build ccsd rdm2 intermediates') fdm2 = lib.H5TmpFile() if d2 is None: d2 = ccsd_rdm._gamma2_outcore(mycc, t1, t2, l1, l2, fdm2, True) time1 = log.timer_debug1('rdm2 intermediates', *time1) mol = mycc.mol mo_coeff = mycc.mo_coeff mo_energy = mycc._scf.mo_energy nao, nmo = mo_coeff.shape nocc = numpy.count_nonzero(mycc.mo_occ > 0) with_frozen = not (mycc.frozen is None or mycc.frozen is 0) OA, VA, OF, VF = _index_frozen_active(mycc.get_frozen_mask(), mycc.mo_occ) log.debug('symmetrized rdm2 and MO->AO transformation') # Roughly, dm2*2 is computed in _rdm2_mo2ao mo_active = mo_coeff[:, numpy.hstack((OA, VA))] _rdm2_mo2ao(mycc, d2, mo_active, fdm2) # transform the active orbitals time1 = log.timer_debug1('MO->AO transformation', *time1) hf_dm1 = mycc._scf.make_rdm1(mycc.mo_coeff, mycc.mo_occ) if atmlst is None: atmlst = range(mol.natm) offsetdic = mol.offset_nr_by_atom() diagidx = numpy.arange(nao) diagidx = diagidx * (diagidx + 1) // 2 + diagidx de = numpy.zeros((len(atmlst), 3)) Imat = numpy.zeros((nao, nao)) vhf1 = fdm2.create_dataset('vhf1', (len(atmlst), 3, nao, nao), 'f8') # 2e AO integrals dot 2pdm max_memory = max(0, mycc.max_memory - lib.current_memory()[0]) blksize = max(1, int(max_memory * .9e6 / 8 / (nao**3 * 2.5))) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = offsetdic[ia] ip1 = p0 vhf = numpy.zeros((3, nao, nao)) for b0, b1, nf in _shell_prange(mol, shl0, shl1, blksize): ip0, ip1 = ip1, ip1 + nf dm2buf = _load_block_tril(fdm2['dm2'], ip0, ip1, nao) dm2buf[:, :, diagidx] *= .5 shls_slice = (b0, b1, 0, mol.nbas, 0, mol.nbas, 0, mol.nbas) eri0 = mol.intor('int2e', aosym='s2kl', shls_slice=shls_slice) Imat += lib.einsum('ipx,iqx->pq', eri0.reshape(nf, nao, -1), dm2buf) eri0 = None eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl', shls_slice=shls_slice).reshape(3, nf, nao, -1) de[k] -= numpy.einsum('xijk,ijk->x', eri1, dm2buf) * 2 dm2buf = None # HF part for i in range(3): eri1tmp = lib.unpack_tril(eri1[i].reshape(nf * nao, -1)) eri1tmp = eri1tmp.reshape(nf, nao, nao, nao) vhf[i] += numpy.einsum('ijkl,ij->kl', eri1tmp, hf_dm1[ip0:ip1]) vhf[i] -= numpy.einsum('ijkl,il->kj', eri1tmp, hf_dm1[ip0:ip1]) * .5 vhf[i, ip0:ip1] += numpy.einsum('ijkl,kl->ij', eri1tmp, hf_dm1) vhf[i, ip0:ip1] -= numpy.einsum('ijkl,jk->il', eri1tmp, hf_dm1) * .5 eri1 = eri1tmp = None vhf1[k] = vhf log.debug('2e-part grad of atom %d %s = %s', ia, mol.atom_symbol(ia), de[k]) time1 = log.timer_debug1('2e-part grad of atom %d' % ia, *time1) Imat = reduce(numpy.dot, (mo_coeff.T, Imat, mycc._scf.get_ovlp(), mo_coeff)) * -1 dm1mo = numpy.zeros((nmo, nmo)) if with_frozen: dco = Imat[OF[:, None], OA] / (mo_energy[OF, None] - mo_energy[OA]) dfv = Imat[VF[:, None], VA] / (mo_energy[VF, None] - mo_energy[VA]) dm1mo[OA[:, None], OA] = doo + doo.T dm1mo[OF[:, None], OA] = dco dm1mo[OA[:, None], OF] = dco.T dm1mo[VA[:, None], VA] = dvv + dvv.T dm1mo[VF[:, None], VA] = dfv dm1mo[VA[:, None], VF] = dfv.T else: dm1mo[:nocc, :nocc] = doo + doo.T dm1mo[nocc:, nocc:] = dvv + dvv.T dm1 = reduce(numpy.dot, (mo_coeff, dm1mo, mo_coeff.T)) vhf = mycc._scf.get_veff(mycc.mol, dm1) * 2 Xvo = reduce(numpy.dot, (mo_coeff[:, nocc:].T, vhf, mo_coeff[:, :nocc])) Xvo += Imat[:nocc, nocc:].T - Imat[nocc:, :nocc] dm1mo += _response_dm1(mycc, Xvo, eris) time1 = log.timer_debug1('response_rdm1 intermediates', *time1) Imat[nocc:, :nocc] = Imat[:nocc, nocc:].T im1 = reduce(numpy.dot, (mo_coeff, Imat, mo_coeff.T)) time1 = log.timer_debug1('response_rdm1', *time1) log.debug('h1 and JK1') hcore_deriv = mf_grad.hcore_generator(mol) s1 = mf_grad.get_ovlp(mol) zeta = lib.direct_sum('i+j->ij', mo_energy, mo_energy) * .5 zeta[nocc:, :nocc] = mo_energy[:nocc] zeta[:nocc, nocc:] = mo_energy[:nocc].reshape(-1, 1) zeta = reduce(numpy.dot, (mo_coeff, zeta * dm1mo, mo_coeff.T)) dm1 = reduce(numpy.dot, (mo_coeff, dm1mo, mo_coeff.T)) p1 = numpy.dot(mo_coeff[:, :nocc], mo_coeff[:, :nocc].T) vhf_s1occ = reduce(numpy.dot, (p1, mycc._scf.get_veff(mol, dm1 + dm1.T), p1)) time1 = log.timer_debug1('h1 and JK1', *time1) # Hartree-Fock part contribution dm1p = hf_dm1 + dm1 * 2 dm1 += hf_dm1 zeta += mf_grad.make_rdm1e(mo_energy, mo_coeff, mycc.mo_occ) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = offsetdic[ia] # s[1] dot I, note matrix im1 is not hermitian de[k] += numpy.einsum('xij,ij->x', s1[:, p0:p1], im1[p0:p1]) de[k] += numpy.einsum('xji,ij->x', s1[:, p0:p1], im1[:, p0:p1]) # h[1] \dot DM, contribute to f1 h1ao = hcore_deriv(ia) de[k] += numpy.einsum('xij,ji->x', h1ao, dm1) # -s[1]*e \dot DM, contribute to f1 de[k] -= numpy.einsum('xij,ij->x', s1[:, p0:p1], zeta[p0:p1]) de[k] -= numpy.einsum('xji,ij->x', s1[:, p0:p1], zeta[:, p0:p1]) # -vhf[s_ij[1]], contribute to f1, *2 for s1+s1.T de[k] -= numpy.einsum('xij,ij->x', s1[:, p0:p1], vhf_s1occ[p0:p1]) * 2 de[k] -= numpy.einsum('xij,ij->x', vhf1[k], dm1p) de += mf_grad.grad_nuc(mol, atmlst) log.timer('%s gradients' % mycc.__class__.__name__, *time0) return de
def kernel(mc, mo_coeff=None, ci=None, atmlst=None, mf_grad=None, verbose=None): if mo_coeff is None: mo_coeff = mc.mo_coeff if ci is None: ci = mc.ci if mf_grad is None: mf_grad = mc._scf.nuc_grad_method() if mc.frozen is not None: raise NotImplementedError mol = mc.mol ncore = mc.ncore ncas = mc.ncas nocc = ncore + ncas nelecas = mc.nelecas nao, nmo = mo_coeff.shape nao_pair = nao * (nao + 1) // 2 mo_occ = mo_coeff[:, :nocc] mo_core = mo_coeff[:, :ncore] mo_cas = mo_coeff[:, ncore:nocc] casdm1, casdm2 = mc.fcisolver.make_rdm12(ci, ncas, nelecas) # gfock = Generalized Fock, Adv. Chem. Phys., 69, 63 dm_core = numpy.dot(mo_core, mo_core.T) * 2 dm_cas = reduce(numpy.dot, (mo_cas, casdm1, mo_cas.T)) aapa = ao2mo.kernel(mol, (mo_cas, mo_cas, mo_occ, mo_cas), compact=False) aapa = aapa.reshape(ncas, ncas, nocc, ncas) vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas)) h1 = mc.get_hcore() vhf_c = vj[0] - vk[0] * .5 vhf_a = vj[1] - vk[1] * .5 gfock = reduce(numpy.dot, (mo_occ.T, h1 + vhf_c + vhf_a, mo_occ)) * 2 gfock[:, ncore:nocc] = reduce(numpy.dot, (mo_occ.T, h1 + vhf_c, mo_cas, casdm1)) gfock[:, ncore:nocc] += numpy.einsum('uviw,vuwt->it', aapa, casdm2) dme0 = reduce(numpy.dot, (mo_occ, (gfock + gfock.T) * .5, mo_occ.T)) aapa = vj = vk = vhf_c = vhf_a = h1 = gfock = None dm1 = dm_core + dm_cas vhf1c, vhf1a = mf_grad.get_veff(mol, (dm_core, dm_cas)) hcore_deriv = mf_grad.hcore_generator(mol) s1 = mf_grad.get_ovlp(mol) diag_idx = numpy.arange(nao) diag_idx = diag_idx * (diag_idx + 1) // 2 + diag_idx casdm2_cc = casdm2 + casdm2.transpose(0, 1, 3, 2) dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2, ncas**2), mo_cas.T, (0, nao, 0, nao)).reshape(ncas**2, nao, nao) dm2buf = lib.pack_tril(dm2buf) dm2buf[:, diag_idx] *= .5 dm2buf = dm2buf.reshape(ncas, ncas, nao_pair) casdm2 = casdm2_cc = None if atmlst is None: atmlst = range(mol.natm) aoslices = mol.aoslice_by_atom() de = numpy.zeros((len(atmlst), 3)) max_memory = mc.max_memory - lib.current_memory()[0] blksize = int(max_memory * .9e6 / 8 / ((aoslices[:, 3] - aoslices[:, 2]).max() * nao_pair)) blksize = min(nao, max(2, blksize)) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = aoslices[ia] h1ao = hcore_deriv(ia) de[k] += numpy.einsum('xij,ij->x', h1ao, dm1) de[k] -= numpy.einsum('xij,ij->x', s1[:, p0:p1], dme0[p0:p1]) * 2 q1 = 0 for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize): q0, q1 = q1, q1 + nf dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1], mo_cas[q0:q1]) shls_slice = (shl0, shl1, b0, b1, 0, mol.nbas, 0, mol.nbas) eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl', shls_slice=shls_slice).reshape( 3, p1 - p0, nf, nao_pair) de[k] -= numpy.einsum('xijw,ijw->x', eri1, dm2_ao) * 2 eri1 = None de[k] += numpy.einsum('xij,ij->x', vhf1c[:, p0:p1], dm1[p0:p1]) * 2 de[k] += numpy.einsum('xij,ij->x', vhf1a[:, p0:p1], dm_core[p0:p1]) * 2 de += rhf_grad.grad_nuc(mol, atmlst) return de
def kernel(mc, mo_coeff=None, ci=None, atmlst=None, mf_grad=None, verbose=None): if mo_coeff is None: mo_coeff = mc._scf.mo_coeff if ci is None: ci = mc.ci if mf_grad is None: mf_grad = mc._scf.nuc_grad_method() assert (isinstance(ci, numpy.ndarray)) mol = mc.mol ncore = mc.ncore ncas = mc.ncas nocc = ncore + ncas nelecas = mc.nelecas nao, nmo = mo_coeff.shape nao_pair = nao * (nao + 1) // 2 mo_energy = mc._scf.mo_energy mo_occ = mo_coeff[:, :nocc] mo_core = mo_coeff[:, :ncore] mo_cas = mo_coeff[:, ncore:nocc] neleca, nelecb = mol.nelec assert (neleca == nelecb) orbo = mo_coeff[:, :neleca] orbv = mo_coeff[:, neleca:] casdm1, casdm2 = mc.fcisolver.make_rdm12(ci, ncas, nelecas) dm_core = numpy.dot(mo_core, mo_core.T) * 2 dm_cas = reduce(numpy.dot, (mo_cas, casdm1, mo_cas.T)) aapa = ao2mo.kernel(mol, (mo_cas, mo_cas, mo_coeff, mo_cas), compact=False) aapa = aapa.reshape(ncas, ncas, nmo, ncas) vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas)) h1 = mc.get_hcore() vhf_c = vj[0] - vk[0] * .5 vhf_a = vj[1] - vk[1] * .5 # Imat = h1_{pi} gamma1_{iq} + h2_{pijk} gamma_{iqkj} Imat = numpy.zeros((nmo, nmo)) Imat[:, :nocc] = reduce(numpy.dot, (mo_coeff.T, h1 + vhf_c + vhf_a, mo_occ)) * 2 Imat[:, ncore:nocc] = reduce(numpy.dot, (mo_coeff.T, h1 + vhf_c, mo_cas, casdm1)) Imat[:, ncore:nocc] += lib.einsum('uviw,vuwt->it', aapa, casdm2) aapa = vj = vk = vhf_c = vhf_a = h1 = None ee = mo_energy[:, None] - mo_energy zvec = numpy.zeros_like(Imat) zvec[:ncore, ncore:neleca] = Imat[:ncore, ncore:neleca] / -ee[:ncore, ncore:neleca] zvec[ncore:neleca, :ncore] = Imat[ ncore:neleca, :ncore] / -ee[ncore:neleca, :ncore] zvec[nocc:, neleca:nocc] = Imat[nocc:, neleca:nocc] / -ee[nocc:, neleca:nocc] zvec[neleca:nocc, nocc:] = Imat[neleca:nocc, nocc:] / -ee[neleca:nocc, nocc:] zvec_ao = reduce(numpy.dot, (mo_coeff, zvec + zvec.T, mo_coeff.T)) vhf = mc._scf.get_veff(mol, zvec_ao) * 2 xvo = reduce(numpy.dot, (orbv.T, vhf, orbo)) xvo += Imat[neleca:, :neleca] - Imat[:neleca, neleca:].T def fvind(x): x = x.reshape(xvo.shape) dm = reduce(numpy.dot, (orbv, x, orbo.T)) v = mc._scf.get_veff(mol, dm + dm.T) v = reduce(numpy.dot, (orbv.T, v, orbo)) return v * 2 dm1resp = cphf.solve(fvind, mo_energy, mc._scf.mo_occ, xvo, max_cycle=30)[0] zvec[neleca:, :neleca] = dm1resp zeta = numpy.einsum('ij,j->ij', zvec, mo_energy) zeta = reduce(numpy.dot, (mo_coeff, zeta, mo_coeff.T)) zvec_ao = reduce(numpy.dot, (mo_coeff, zvec + zvec.T, mo_coeff.T)) p1 = numpy.dot(mo_coeff[:, :neleca], mo_coeff[:, :neleca].T) vhf_s1occ = reduce(numpy.dot, (p1, mc._scf.get_veff(mol, zvec_ao), p1)) Imat[:ncore, ncore:neleca] = 0 Imat[ncore:neleca, :ncore] = 0 Imat[nocc:, neleca:nocc] = 0 Imat[neleca:nocc, nocc:] = 0 Imat[neleca:, :neleca] = Imat[:neleca, neleca:].T im1 = reduce(numpy.dot, (mo_coeff, Imat, mo_coeff.T)) casci_dm1 = dm_core + dm_cas hf_dm1 = mc._scf.make_rdm1(mo_coeff, mc._scf.mo_occ) hcore_deriv = mf_grad.hcore_generator(mol) s1 = mf_grad.get_ovlp(mol) diag_idx = numpy.arange(nao) diag_idx = diag_idx * (diag_idx + 1) // 2 + diag_idx casdm2_cc = casdm2 + casdm2.transpose(0, 1, 3, 2) dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2, ncas**2), mo_cas.T, (0, nao, 0, nao)).reshape(ncas**2, nao, nao) dm2buf = lib.pack_tril(dm2buf) dm2buf[:, diag_idx] *= .5 dm2buf = dm2buf.reshape(ncas, ncas, nao_pair) casdm2 = casdm2_cc = None if atmlst is None: atmlst = range(mol.natm) aoslices = mol.aoslice_by_atom() de = numpy.zeros((len(atmlst), 3)) max_memory = mc.max_memory - lib.current_memory()[0] blksize = int(max_memory * .9e6 / 8 / ((aoslices[:, 3] - aoslices[:, 2]).max() * nao_pair)) blksize = min(nao, max(2, blksize)) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = aoslices[ia] h1ao = hcore_deriv(ia) de[k] += numpy.einsum('xij,ij->x', h1ao, casci_dm1) de[k] += numpy.einsum('xij,ij->x', h1ao, zvec_ao) vhf1 = numpy.zeros((3, nao, nao)) q1 = 0 for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize): q0, q1 = q1, q1 + nf dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1], mo_cas[q0:q1]) shls_slice = (shl0, shl1, b0, b1, 0, mol.nbas, 0, mol.nbas) eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl', shls_slice=shls_slice).reshape( 3, p1 - p0, nf, nao_pair) de[k] -= numpy.einsum('xijw,ijw->x', eri1, dm2_ao) * 2 for i in range(3): eri1tmp = lib.unpack_tril(eri1[i].reshape((p1 - p0) * nf, -1)) eri1tmp = eri1tmp.reshape(p1 - p0, nf, nao, nao) de[k, i] -= numpy.einsum('ijkl,ij,kl', eri1tmp, hf_dm1[p0:p1, q0:q1], zvec_ao) * 2 de[k, i] -= numpy.einsum('ijkl,kl,ij', eri1tmp, hf_dm1, zvec_ao[p0:p1, q0:q1]) * 2 de[k, i] += numpy.einsum('ijkl,il,kj', eri1tmp, hf_dm1[p0:p1], zvec_ao[q0:q1]) de[k, i] += numpy.einsum('ijkl,jk,il', eri1tmp, hf_dm1[q0:q1], zvec_ao[p0:p1]) #:vhf1c, vhf1a = mf_grad.get_veff(mol, (dm_core, dm_cas)) #:de[k] += numpy.einsum('xij,ij->x', vhf1c[:,p0:p1], casci_dm1[p0:p1]) * 2 #:de[k] += numpy.einsum('xij,ij->x', vhf1a[:,p0:p1], dm_core[p0:p1]) * 2 de[k, i] -= numpy.einsum('ijkl,lk,ij', eri1tmp, dm_core[q0:q1], casci_dm1[p0:p1]) * 2 de[k, i] += numpy.einsum('ijkl,jk,il', eri1tmp, dm_core[q0:q1], casci_dm1[p0:p1]) de[k, i] -= numpy.einsum('ijkl,lk,ij', eri1tmp, dm_cas[q0:q1], dm_core[p0:p1]) * 2 de[k, i] += numpy.einsum('ijkl,jk,il', eri1tmp, dm_cas[q0:q1], dm_core[p0:p1]) eri1 = eri1tmp = None de[k] -= numpy.einsum('xij,ij->x', s1[:, p0:p1], im1[p0:p1]) de[k] -= numpy.einsum('xij,ji->x', s1[:, p0:p1], im1[:, p0:p1]) de[k] -= numpy.einsum('xij,ij->x', s1[:, p0:p1], zeta[p0:p1]) * 2 de[k] -= numpy.einsum('xij,ji->x', s1[:, p0:p1], zeta[:, p0:p1]) * 2 de[k] -= numpy.einsum('xij,ij->x', s1[:, p0:p1], vhf_s1occ[p0:p1]) * 2 de[k] -= numpy.einsum('xij,ji->x', s1[:, p0:p1], vhf_s1occ[:, p0:p1]) * 2 de += rhf_grad.grad_nuc(mol, atmlst) return de
def grad_elec(mc_grad, mo_coeff=None, ci=None, atmlst=None, verbose=None): mc = mc_grad.base if mo_coeff is None: mo_coeff = mc.mo_coeff if ci is None: ci = mc.ci if mc.frozen is not None: raise NotImplementedError time0 = time.clock(), time.time() log = logger.new_logger(mc_grad, verbose) mol = mc_grad.mol ncore = mc.ncore ncas = mc.ncas nocc = ncore + ncas nelecas = mc.nelecas nao, nmo = mo_coeff.shape nao_pair = nao * (nao+1) // 2 # Necessary kludge because gfock isn't zero in occ-virt space in SA-CASSCf # Among many other potential applications! if hasattr (mc, '_tag_gfock_ov_nonzero'): if mc._tag_gfock_ov_nonzero: nocc = nmo mo_occ = mo_coeff[:,:nocc] mo_core = mo_coeff[:,:ncore] mo_cas = mo_coeff[:,ncore:ncore+ncas] casdm1, casdm2 = mc.fcisolver.make_rdm12(ci, ncas, nelecas) # gfock = Generalized Fock, Adv. Chem. Phys., 69, 63 dm_core = numpy.dot(mo_core, mo_core.T) * 2 dm_cas = reduce(numpy.dot, (mo_cas, casdm1, mo_cas.T)) # MRH flag: this is one of my kludges # It would be better to just pass the ERIS object used in orbital optimization # But I am too lazy at the moment aapa = ao2mo.kernel(mol, (mo_cas, mo_cas, mo_occ, mo_cas), compact=False) aapa = aapa.reshape(ncas,ncas,nocc,ncas) vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas)) h1 = mc.get_hcore() vhf_c = vj[0] - vk[0] * .5 vhf_a = vj[1] - vk[1] * .5 gfock = numpy.zeros ((nocc, nocc)) gfock[:,:ncore] = reduce(numpy.dot, (mo_occ.T, h1 + vhf_c + vhf_a, mo_core)) * 2 gfock[:,ncore:ncore+ncas] = reduce(numpy.dot, (mo_occ.T, h1 + vhf_c, mo_cas, casdm1)) gfock[:,ncore:ncore+ncas] += numpy.einsum('uviw,vuwt->it', aapa, casdm2) dme0 = reduce(numpy.dot, (mo_occ, (gfock+gfock.T)*.5, mo_occ.T)) aapa = vj = vk = vhf_c = vhf_a = h1 = gfock = None dm1 = dm_core + dm_cas vj, vk = mc_grad.get_jk(mol, (dm_core, dm_cas)) vhf1c, vhf1a = vj - vk * .5 hcore_deriv = mc_grad.hcore_generator(mol) s1 = mc_grad.get_ovlp(mol) diag_idx = numpy.arange(nao) diag_idx = diag_idx * (diag_idx+1) // 2 + diag_idx casdm2_cc = casdm2 + casdm2.transpose(0,1,3,2) dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2,ncas**2), mo_cas.T, (0, nao, 0, nao)).reshape(ncas**2,nao,nao) dm2buf = lib.pack_tril(dm2buf) dm2buf[:,diag_idx] *= .5 dm2buf = dm2buf.reshape(ncas,ncas,nao_pair) casdm2 = casdm2_cc = None if atmlst is None: atmlst = range(mol.natm) aoslices = mol.aoslice_by_atom() de = numpy.zeros((len(atmlst),3)) max_memory = mc_grad.max_memory - lib.current_memory()[0] # MRH: this originally implied that the memory footprint would be max(p1-p0) * max(q1-q0) * nao_pair # In fact, that's the size of dm2_ao AND EACH COMPONENT of the differentiated eris # So the actual memory footprint is 4 times that! blksize = int(max_memory*.9e6/8 / (4*(aoslices[:,3]-aoslices[:,2]).max()*nao_pair)) blksize = min(nao, max(2, blksize)) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = aoslices[ia] h1ao = hcore_deriv(ia) de[k] += numpy.einsum('xij,ij->x', h1ao, dm1) de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], dme0[p0:p1]) * 2 q1 = 0 for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize): q0, q1 = q1, q1 + nf dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1], mo_cas[q0:q1]) shls_slice = (shl0,shl1,b0,b1,0,mol.nbas,0,mol.nbas) eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl', shls_slice=shls_slice).reshape(3,p1-p0,nf,nao_pair) de[k] -= numpy.einsum('xijw,ijw->x', eri1, dm2_ao) * 2 eri1 = None de[k] += numpy.einsum('xij,ij->x', vhf1c[:,p0:p1], dm1[p0:p1]) * 2 de[k] += numpy.einsum('xij,ij->x', vhf1a[:,p0:p1], dm_core[p0:p1]) * 2 log.timer('CASSCF nuclear gradients', *time0) return de
def kernel(mc, mo_coeff=None, ci=None, atmlst=None, mf_grad=None, verbose=None): if mo_coeff is None: mo_coeff = mc._scf.mo_coeff if ci is None: ci = mc.ci if mf_grad is None: mf_grad = mc._scf.nuc_grad_method() mol = mc.mol ncore = mc.ncore ncas = mc.ncas nocc = ncore + ncas nelecas = mc.nelecas nao, nmo = mo_coeff.shape nao_pair = nao * (nao + 1) // 2 mo_energy = mc._scf.mo_energy hcore_deriv = mf_grad.hcore_generator(mol) s1 = mf_grad.get_ovlp(mol) mo_occ = mo_coeff[:, :nocc] mo_core = mo_coeff[:, :ncore] mo_cas = mo_coeff[:, ncore:nocc] casdm1, casdm2 = mc.fcisolver.make_rdm12(mc.ci, ncas, nelecas) # gfock = Generalized Fock, Adv. Chem. Phys., 69, 63 dm_core = numpy.dot(mo_core, mo_core.T) * 2 dm_cas = reduce(numpy.dot, (mo_cas, casdm1, mo_cas.T)) aapa = ao2mo.kernel(mol, (mo_cas, mo_cas, mo_occ, mo_cas), compact=False) aapa = aapa.reshape(ncas, ncas, nocc, ncas) vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas)) h1 = mc.get_hcore() vhf_c = vj[0] - vk[0] * .5 vhf_a = vj[1] - vk[1] * .5 gfock = reduce(numpy.dot, (mo_occ.T, h1 + vhf_c + vhf_a, mo_occ)) * 2 gfock[:, ncore:nocc] = reduce(numpy.dot, (mo_occ.T, h1 + vhf_c, mo_cas, casdm1)) gfock[:, ncore:nocc] += numpy.einsum('uviw,vuwt->it', aapa, casdm2) dme0 = reduce(numpy.dot, (mo_occ, (gfock + gfock.T) * .5, mo_occ.T)) aapa = vj = vk = vhf_c = vhf_a = h1 = gfock = None dm1 = dm_core + dm_cas vhf1c, vhf1a = mf_grad.get_veff(mol, (dm_core, dm_cas)) diag_idx = numpy.arange(nao) diag_idx = diag_idx * (diag_idx + 1) // 2 + diag_idx casdm2_cc = casdm2 + casdm2.transpose(0, 1, 3, 2) dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2, ncas**2), mo_cas.T, (0, nao, 0, nao)).reshape(ncas**2, nao, nao) dm2buf = lib.pack_tril(dm2buf) dm2buf[:, diag_idx] *= .5 dm2buf = dm2buf.reshape(ncas, ncas, nao_pair) #casdm2 = casdm2_cc = None atmlst = range(mol.natm) aoslices = mol.aoslice_by_atom() de = numpy.zeros((len(atmlst), 3)) max_memory = mc.max_memory - lib.current_memory()[0] blksize = int(max_memory * .9e6 / 8 / ((aoslices[:, 3] - aoslices[:, 2]).max() * nao_pair)) blksize = min(nao, max(2, blksize)) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = aoslices[ia] h1ao = hcore_deriv(ia) de[k] += numpy.einsum('xij,ij->x', h1ao, dm1) #de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], dme0[p0:p1]) * 2 q1 = 0 for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize): q0, q1 = q1, q1 + nf dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1], mo_cas[q0:q1]) shls_slice = (shl0, shl1, b0, b1, 0, mol.nbas, 0, mol.nbas) eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl', shls_slice=shls_slice).reshape( 3, p1 - p0, nf, nao_pair) de[k] -= numpy.einsum('xijw,ijw->x', eri1, dm2_ao) * 2 eri1 = None de[k] += numpy.einsum('xij,ij->x', vhf1c[:, p0:p1], dm1[p0:p1]) * 2 de[k] += numpy.einsum('xij,ij->x', vhf1a[:, p0:p1], dm_core[p0:p1]) * 2 dm2 = numpy.zeros((nmo, nmo, nmo, nmo)) for i in range(ncore): for j in range(ncore): dm2[i, i, j, j] += 4 dm2[i, j, j, i] -= 2 dm2[i, i, ncore:nocc, ncore:nocc] = casdm1 * 2 dm2[ncore:nocc, ncore:nocc, i, i] = casdm1 * 2 dm2[i, ncore:nocc, ncore:nocc, i] = -casdm1 dm2[ncore:nocc, i, i, ncore:nocc] = -casdm1 dm2[ncore:nocc, ncore:nocc, ncore:nocc, ncore:nocc] = casdm2 eri0 = ao2mo.restore(1, ao2mo.full(mc._scf._eri, mo_coeff), nmo) Imat = numpy.einsum('pjkl,qjkl->pq', eri0, dm2) dm1 = numpy.zeros((nmo, nmo)) for i in range(ncore): dm1[i, i] = 2 dm1[ncore:nocc, ncore:nocc] = casdm1 neleca, nelecb = mol.nelec h1 = -(mol.intor('int1e_ipkin', comp=3) + mol.intor('int1e_ipnuc', comp=3)) s1 = -mol.intor('int1e_ipovlp', comp=3) eri1 = mol.intor('int2e_ip1', comp=3).reshape(3, nao, nao, nao, nao) eri1 = numpy.einsum('xipkl,pj->xijkl', eri1, mo_coeff) eri1 = numpy.einsum('xijpl,pk->xijkl', eri1, mo_coeff) eri1 = numpy.einsum('xijkp,pl->xijkl', eri1, mo_coeff) h0 = reduce(numpy.dot, (mo_coeff.T, mc._scf.get_hcore(), mo_coeff)) g0 = ao2mo.restore(1, ao2mo.full(mol, mo_coeff), nmo) def hess(): nocc = mol.nelectron // 2 nvir = nmo - nocc eri_mo = g0 eai = lib.direct_sum('a-i->ai', mo_energy[nocc:], mo_energy[:nocc]) h = eri_mo[nocc:, :nocc, nocc:, :nocc] * 4 h -= numpy.einsum('cdlk->ckdl', eri_mo[nocc:, nocc:, :nocc, :nocc]) h -= numpy.einsum('cldk->ckdl', eri_mo[nocc:, :nocc, nocc:, :nocc]) for a in range(nvir): for i in range(nocc): h[a, i, a, i] += eai[a, i] return -h.reshape(nocc * nvir, -1) hh = hess() ee = mo_energy[:, None] - mo_energy for k, (sh0, sh1, p0, p1) in enumerate(mol.offset_nr_by_atom()): mol.set_rinv_origin(mol.atom_coord(k)) vrinv = -mol.atom_charge(k) * mol.intor('int1e_iprinv', comp=3) # 2e AO integrals dot 2pdm for i in range(3): g1 = numpy.einsum('pjkl,pi->ijkl', eri1[i, p0:p1], mo_coeff[p0:p1]) g1 = g1 + g1.transpose(1, 0, 2, 3) g1 = g1 + g1.transpose(2, 3, 0, 1) g1 *= -1 hx = (numpy.einsum('pq,pi,qj->ij', h1[i, p0:p1], mo_coeff[p0:p1], mo_coeff) + reduce(numpy.dot, (mo_coeff.T, vrinv[i], mo_coeff))) hx = hx + hx.T sx = numpy.einsum('pq,pi,qj->ij', s1[i, p0:p1], mo_coeff[p0:p1], mo_coeff) sx = sx + sx.T fij = (hx[:neleca, :neleca] - numpy.einsum( 'ij,j->ij', sx[:neleca, :neleca], mo_energy[:neleca]) - numpy.einsum('kl,ijlk->ij', sx[:neleca, :neleca], g0[:neleca, :neleca, :neleca, :neleca]) * 2 + numpy.einsum('kl,iklj->ij', sx[:neleca, :neleca], g0[:neleca, :neleca, :neleca, :neleca]) + numpy.einsum('ijkk->ij', g1[:neleca, :neleca, :neleca, :neleca]) * 2 - numpy.einsum('ikkj->ij', g1[:neleca, :neleca, :neleca, :neleca])) fab = (hx[neleca:, neleca:] - numpy.einsum( 'ij,j->ij', sx[neleca:, neleca:], mo_energy[neleca:]) - numpy.einsum('kl,ijlk->ij', sx[:neleca, :neleca], g0[neleca:, neleca:, :neleca, :neleca]) * 2 + numpy.einsum('kl,iklj->ij', sx[:neleca, :neleca], g0[neleca:, :neleca, :neleca, neleca:]) + numpy.einsum('ijkk->ij', g1[neleca:, neleca:, :neleca, :neleca]) * 2 - numpy.einsum('ikkj->ij', g1[neleca:, :neleca, :neleca, neleca:])) fai = (hx[neleca:, :neleca] - numpy.einsum( 'ai,i->ai', sx[neleca:, :neleca], mo_energy[:neleca]) - numpy.einsum('kl,ijlk->ij', sx[:neleca, :neleca], g0[neleca:, :neleca, :neleca, :neleca]) * 2 + numpy.einsum('kl,iklj->ij', sx[:neleca, :neleca], g0[neleca:, :neleca, :neleca, :neleca]) + numpy.einsum('ijkk->ij', g1[neleca:, :neleca, :neleca, :neleca]) * 2 - numpy.einsum('ikkj->ij', g1[neleca:, :neleca, :neleca, :neleca])) c1 = numpy.zeros((nmo, nmo)) c1[:neleca, :neleca] = -.5 * sx[:neleca, :neleca] c1[neleca:, neleca:] = -.5 * sx[neleca:, neleca:] cvo1 = numpy.linalg.solve(hh, fai.ravel()).reshape(-1, neleca) cov1 = -(sx[neleca:, :neleca] + cvo1).T c1[neleca:, :neleca] = cvo1 c1[:neleca, neleca:] = cov1 v1 = numpy.einsum('pqai,ai->pq', g0[:, :, neleca:, :neleca], cvo1) * 4 v1 -= numpy.einsum('paiq,ai->pq', g0[:, neleca:, :neleca, :], cvo1) v1 -= numpy.einsum('piaq,ai->pq', g0[:, :neleca, neleca:, :], cvo1) fij += v1[:neleca, :neleca] fab += v1[neleca:, neleca:] c1[:ncore, ncore:neleca] = -fij[:ncore, ncore:] / ee[:ncore, ncore:neleca] c1[ncore:neleca, :ncore] = -fij[ncore:, :ncore] / ee[ ncore:neleca, :ncore] m = nocc - neleca c1[nocc:, neleca:nocc] = -fab[m:, :m] / ee[nocc:, neleca:nocc] c1[neleca:nocc, nocc:] = -fab[:m, m:] / ee[neleca:nocc, nocc:] h0c1 = h0.dot(c1) h0c1 = h0c1 + h0c1.T g0c1 = numpy.einsum('pjkl,pi->ijkl', g0, c1) g0c1 = g0c1 + g0c1.transpose(1, 0, 2, 3) g0c1 = g0c1 + g0c1.transpose(2, 3, 0, 1) de[k, i] += numpy.einsum('ij,ji', h0c1, dm1) de[k, i] += numpy.einsum('ijkl,jilk', g0c1, dm2) * .5 de += rhf_grad.grad_nuc(mol) return de
def kernel(mc, mo_coeff=None, ci=None, atmlst=None, mf_grad=None, verbose=None): if mo_coeff is None: mo_coeff = mc._scf.mo_coeff if ci is None: ci = mc.ci if mf_grad is None: mf_grad = mc._scf.nuc_grad_method() assert(isinstance(ci, numpy.ndarray)) mol = mc.mol ncore = mc.ncore ncas = mc.ncas nocc = ncore + ncas nelecas = mc.nelecas nao, nmo = mo_coeff.shape nao_pair = nao * (nao+1) // 2 mo_energy = mc._scf.mo_energy mo_occ = mo_coeff[:,:nocc] mo_core = mo_coeff[:,:ncore] mo_cas = mo_coeff[:,ncore:nocc] neleca, nelecb = mol.nelec assert(neleca == nelecb) orbo = mo_coeff[:,:neleca] orbv = mo_coeff[:,neleca:] casdm1, casdm2 = mc.fcisolver.make_rdm12(ci, ncas, nelecas) dm_core = numpy.dot(mo_core, mo_core.T) * 2 dm_cas = reduce(numpy.dot, (mo_cas, casdm1, mo_cas.T)) aapa = ao2mo.kernel(mol, (mo_cas, mo_cas, mo_coeff, mo_cas), compact=False) aapa = aapa.reshape(ncas,ncas,nmo,ncas) vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas)) h1 = mc.get_hcore() vhf_c = vj[0] - vk[0] * .5 vhf_a = vj[1] - vk[1] * .5 # Imat = h1_{pi} gamma1_{iq} + h2_{pijk} gamma_{iqkj} Imat = numpy.zeros((nmo,nmo)) Imat[:,:nocc] = reduce(numpy.dot, (mo_coeff.T, h1 + vhf_c + vhf_a, mo_occ)) * 2 Imat[:,ncore:nocc] = reduce(numpy.dot, (mo_coeff.T, h1 + vhf_c, mo_cas, casdm1)) Imat[:,ncore:nocc] += lib.einsum('uviw,vuwt->it', aapa, casdm2) aapa = vj = vk = vhf_c = vhf_a = h1 = None ee = mo_energy[:,None] - mo_energy zvec = numpy.zeros_like(Imat) zvec[:ncore,ncore:neleca] = Imat[:ncore,ncore:neleca] / -ee[:ncore,ncore:neleca] zvec[ncore:neleca,:ncore] = Imat[ncore:neleca,:ncore] / -ee[ncore:neleca,:ncore] zvec[nocc:,neleca:nocc] = Imat[nocc:,neleca:nocc] / -ee[nocc:,neleca:nocc] zvec[neleca:nocc,nocc:] = Imat[neleca:nocc,nocc:] / -ee[neleca:nocc,nocc:] zvec_ao = reduce(numpy.dot, (mo_coeff, zvec+zvec.T, mo_coeff.T)) vhf = mc._scf.get_veff(mol, zvec_ao) * 2 xvo = reduce(numpy.dot, (orbv.T, vhf, orbo)) xvo += Imat[neleca:,:neleca] - Imat[:neleca,neleca:].T def fvind(x): x = x.reshape(xvo.shape) dm = reduce(numpy.dot, (orbv, x, orbo.T)) v = mc._scf.get_veff(mol, dm + dm.T) v = reduce(numpy.dot, (orbv.T, v, orbo)) return v * 2 dm1resp = cphf.solve(fvind, mo_energy, mc._scf.mo_occ, xvo, max_cycle=30)[0] zvec[neleca:,:neleca] = dm1resp zeta = numpy.einsum('ij,j->ij', zvec, mo_energy) zeta = reduce(numpy.dot, (mo_coeff, zeta, mo_coeff.T)) zvec_ao = reduce(numpy.dot, (mo_coeff, zvec+zvec.T, mo_coeff.T)) p1 = numpy.dot(mo_coeff[:,:neleca], mo_coeff[:,:neleca].T) vhf_s1occ = reduce(numpy.dot, (p1, mc._scf.get_veff(mol, zvec_ao), p1)) Imat[:ncore,ncore:neleca] = 0 Imat[ncore:neleca,:ncore] = 0 Imat[nocc:,neleca:nocc] = 0 Imat[neleca:nocc,nocc:] = 0 Imat[neleca:,:neleca] = Imat[:neleca,neleca:].T im1 = reduce(numpy.dot, (mo_coeff, Imat, mo_coeff.T)) casci_dm1 = dm_core + dm_cas hf_dm1 = mc._scf.make_rdm1(mo_coeff, mc._scf.mo_occ) hcore_deriv = mf_grad.hcore_generator(mol) s1 = mf_grad.get_ovlp(mol) diag_idx = numpy.arange(nao) diag_idx = diag_idx * (diag_idx+1) // 2 + diag_idx casdm2_cc = casdm2 + casdm2.transpose(0,1,3,2) dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2,ncas**2), mo_cas.T, (0, nao, 0, nao)).reshape(ncas**2,nao,nao) dm2buf = lib.pack_tril(dm2buf) dm2buf[:,diag_idx] *= .5 dm2buf = dm2buf.reshape(ncas,ncas,nao_pair) casdm2 = casdm2_cc = None if atmlst is None: atmlst = range(mol.natm) aoslices = mol.aoslice_by_atom() de = numpy.zeros((len(atmlst),3)) max_memory = mc.max_memory - lib.current_memory()[0] blksize = int(max_memory*.9e6/8 / ((aoslices[:,3]-aoslices[:,2]).max()*nao_pair)) blksize = min(nao, max(2, blksize)) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = aoslices[ia] h1ao = hcore_deriv(ia) de[k] += numpy.einsum('xij,ij->x', h1ao, casci_dm1) de[k] += numpy.einsum('xij,ij->x', h1ao, zvec_ao) vhf1 = numpy.zeros((3,nao,nao)) q1 = 0 for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize): q0, q1 = q1, q1 + nf dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1], mo_cas[q0:q1]) shls_slice = (shl0,shl1,b0,b1,0,mol.nbas,0,mol.nbas) eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl', shls_slice=shls_slice).reshape(3,p1-p0,nf,nao_pair) de[k] -= numpy.einsum('xijw,ijw->x', eri1, dm2_ao) * 2 for i in range(3): eri1tmp = lib.unpack_tril(eri1[i].reshape((p1-p0)*nf,-1)) eri1tmp = eri1tmp.reshape(p1-p0,nf,nao,nao) de[k,i] -= numpy.einsum('ijkl,ij,kl', eri1tmp, hf_dm1[p0:p1,q0:q1], zvec_ao) * 2 de[k,i] -= numpy.einsum('ijkl,kl,ij', eri1tmp, hf_dm1, zvec_ao[p0:p1,q0:q1]) * 2 de[k,i] += numpy.einsum('ijkl,il,kj', eri1tmp, hf_dm1[p0:p1], zvec_ao[q0:q1]) de[k,i] += numpy.einsum('ijkl,jk,il', eri1tmp, hf_dm1[q0:q1], zvec_ao[p0:p1]) #:vhf1c, vhf1a = mf_grad.get_veff(mol, (dm_core, dm_cas)) #:de[k] += numpy.einsum('xij,ij->x', vhf1c[:,p0:p1], casci_dm1[p0:p1]) * 2 #:de[k] += numpy.einsum('xij,ij->x', vhf1a[:,p0:p1], dm_core[p0:p1]) * 2 de[k,i] -= numpy.einsum('ijkl,lk,ij', eri1tmp, dm_core[q0:q1], casci_dm1[p0:p1]) * 2 de[k,i] += numpy.einsum('ijkl,jk,il', eri1tmp, dm_core[q0:q1], casci_dm1[p0:p1]) de[k,i] -= numpy.einsum('ijkl,lk,ij', eri1tmp, dm_cas[q0:q1], dm_core[p0:p1]) * 2 de[k,i] += numpy.einsum('ijkl,jk,il', eri1tmp, dm_cas[q0:q1], dm_core[p0:p1]) eri1 = eri1tmp = None de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], im1[p0:p1]) de[k] -= numpy.einsum('xij,ji->x', s1[:,p0:p1], im1[:,p0:p1]) de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], zeta[p0:p1]) * 2 de[k] -= numpy.einsum('xij,ji->x', s1[:,p0:p1], zeta[:,p0:p1]) * 2 de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], vhf_s1occ[p0:p1]) * 2 de[k] -= numpy.einsum('xij,ji->x', s1[:,p0:p1], vhf_s1occ[:,p0:p1]) * 2 de += mf_grad.grad_nuc(mol, atmlst) return de
def kernel(mp, t2, atmlst=None, mf_grad=None, verbose=logger.INFO): if mf_grad is None: mf_grad = mp._scf.nuc_grad_method() log = logger.new_logger(mp, verbose) time0 = time.clock(), time.time() log.debug('Build ump2 rdm1 intermediates') d1 = ump2._gamma1_intermediates(mp, t2) time1 = log.timer_debug1('rdm1 intermediates', *time0) log.debug('Build ump2 rdm2 intermediates') mol = mp.mol with_frozen = not (mp.frozen is None or mp.frozen is 0) moidx = mp.get_frozen_mask() OA_a, VA_a, OF_a, VF_a = mp2_grad._index_frozen_active(moidx[0], mp.mo_occ[0]) OA_b, VA_b, OF_b, VF_b = mp2_grad._index_frozen_active(moidx[1], mp.mo_occ[1]) orboa = mp.mo_coeff[0][:,OA_a] orbva = mp.mo_coeff[0][:,VA_a] orbob = mp.mo_coeff[1][:,OA_b] orbvb = mp.mo_coeff[1][:,VA_b] nao, nocca = orboa.shape nvira = orbva.shape[1] noccb = orbob.shape[1] nvirb = orbvb.shape[1] # Partially transform MP2 density matrix and hold it in memory # The rest transformation are applied during the contraction to ERI integrals t2aa, t2ab, t2bb = t2 part_dm2aa = _ao2mo.nr_e2(t2aa.reshape(nocca**2,nvira**2), numpy.asarray(orbva.T, order='F'), (0,nao,0,nao), 's1', 's1').reshape(nocca,nocca,nao,nao) part_dm2bb = _ao2mo.nr_e2(t2bb.reshape(noccb**2,nvirb**2), numpy.asarray(orbvb.T, order='F'), (0,nao,0,nao), 's1', 's1').reshape(noccb,noccb,nao,nao) part_dm2ab = lib.einsum('ijab,pa,qb->ipqj', t2ab, orbva, orbvb) part_dm2aa = (part_dm2aa.transpose(0,2,3,1) - part_dm2aa.transpose(0,3,2,1)) * .5 part_dm2bb = (part_dm2bb.transpose(0,2,3,1) - part_dm2bb.transpose(0,3,2,1)) * .5 hf_dm1a, hf_dm1b = mp._scf.make_rdm1(mp.mo_coeff, mp.mo_occ) hf_dm1 = hf_dm1a + hf_dm1b if atmlst is None: atmlst = range(mol.natm) offsetdic = mol.offset_nr_by_atom() diagidx = numpy.arange(nao) diagidx = diagidx*(diagidx+1)//2 + diagidx de = numpy.zeros((len(atmlst),3)) Imata = numpy.zeros((nao,nao)) Imatb = numpy.zeros((nao,nao)) fdm2 = lib.H5TmpFile() vhf1 = fdm2.create_dataset('vhf1', (len(atmlst),2,3,nao,nao), 'f8') # 2e AO integrals dot 2pdm max_memory = max(0, mp.max_memory - lib.current_memory()[0]) blksize = max(1, int(max_memory*.9e6/8/(nao**3*2.5))) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = offsetdic[ia] ip1 = p0 vhf = numpy.zeros((2,3,nao,nao)) for b0, b1, nf in mp2_grad._shell_prange(mol, shl0, shl1, blksize): ip0, ip1 = ip1, ip1 + nf dm2bufa = lib.einsum('pi,iqrj->pqrj', orboa[ip0:ip1], part_dm2aa) dm2bufa+= lib.einsum('qi,iprj->pqrj', orboa, part_dm2aa[:,ip0:ip1]) dm2bufa = lib.einsum('pqrj,sj->pqrs', dm2bufa, orboa) tmp = lib.einsum('pi,iqrj->pqrj', orboa[ip0:ip1], part_dm2ab) tmp+= lib.einsum('qi,iprj->pqrj', orboa, part_dm2ab[:,ip0:ip1]) dm2bufa+= lib.einsum('pqrj,sj->pqrs', tmp, orbob) tmp = None dm2bufa = dm2bufa + dm2bufa.transpose(0,1,3,2) dm2bufa = lib.pack_tril(dm2bufa.reshape(-1,nao,nao)).reshape(nf,nao,-1) dm2bufa[:,:,diagidx] *= .5 dm2bufb = lib.einsum('pi,iqrj->pqrj', orbob[ip0:ip1], part_dm2bb) dm2bufb+= lib.einsum('qi,iprj->pqrj', orbob, part_dm2bb[:,ip0:ip1]) dm2bufb = lib.einsum('pqrj,sj->pqrs', dm2bufb, orbob) tmp = lib.einsum('iqrj,sj->iqrs', part_dm2ab, orbob[ip0:ip1]) tmp+= lib.einsum('iqrj,sj->iqsr', part_dm2ab[:,:,ip0:ip1], orbob) dm2bufb+= lib.einsum('pi,iqrs->srpq', orboa, tmp) tmp = None dm2bufb = dm2bufb + dm2bufb.transpose(0,1,3,2) dm2bufb = lib.pack_tril(dm2bufb.reshape(-1,nao,nao)).reshape(nf,nao,-1) dm2bufb[:,:,diagidx] *= .5 shls_slice = (b0,b1,0,mol.nbas,0,mol.nbas,0,mol.nbas) eri0 = mol.intor('int2e', aosym='s2kl', shls_slice=shls_slice) Imata += lib.einsum('ipx,iqx->pq', eri0.reshape(nf,nao,-1), dm2bufa) Imatb += lib.einsum('ipx,iqx->pq', eri0.reshape(nf,nao,-1), dm2bufb) eri0 = None eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl', shls_slice=shls_slice).reshape(3,nf,nao,-1) de[k] -= numpy.einsum('xijk,ijk->x', eri1, dm2bufa) * 2 de[k] -= numpy.einsum('xijk,ijk->x', eri1, dm2bufb) * 2 dm2bufa = dm2bufb = None # HF part for i in range(3): eri1tmp = lib.unpack_tril(eri1[i].reshape(nf*nao,-1)) eri1tmp = eri1tmp.reshape(nf,nao,nao,nao) vhf[:,i] += numpy.einsum('ijkl,ij->kl', eri1tmp, hf_dm1[ip0:ip1]) vhf[0,i] -= numpy.einsum('ijkl,il->kj', eri1tmp, hf_dm1a[ip0:ip1]) vhf[1,i] -= numpy.einsum('ijkl,il->kj', eri1tmp, hf_dm1b[ip0:ip1]) vhf[:,i,ip0:ip1] += numpy.einsum('ijkl,kl->ij', eri1tmp, hf_dm1) vhf[0,i,ip0:ip1] -= numpy.einsum('ijkl,jk->il', eri1tmp, hf_dm1a) vhf[1,i,ip0:ip1] -= numpy.einsum('ijkl,jk->il', eri1tmp, hf_dm1b) eri1 = eri1tmp = None vhf1[k] = vhf log.debug('2e-part grad of atom %d %s = %s', ia, mol.atom_symbol(ia), de[k]) time1 = log.timer_debug1('2e-part grad of atom %d'%ia, *time1) # Recompute nocc, nvir to include the frozen orbitals and make contraction for # the 1-particle quantities, see also the kernel function in uccsd_grad module. mo_a, mo_b = mp.mo_coeff mo_ea, mo_eb = mp._scf.mo_energy nao, nmoa = mo_a.shape nmob = mo_b.shape[1] nocca = numpy.count_nonzero(mp.mo_occ[0] > 0) noccb = numpy.count_nonzero(mp.mo_occ[1] > 0) s0 = mp._scf.get_ovlp() Imata = reduce(numpy.dot, (mo_a.T, Imata, s0, mo_a)) * -1 Imatb = reduce(numpy.dot, (mo_b.T, Imatb, s0, mo_b)) * -1 dm1a = numpy.zeros((nmoa,nmoa)) dm1b = numpy.zeros((nmob,nmob)) doo, dOO = d1[0] dvv, dVV = d1[1] if with_frozen: dco = Imata[OF_a[:,None],OA_a] / (mo_ea[OF_a,None] - mo_ea[OA_a]) dfv = Imata[VF_a[:,None],VA_a] / (mo_ea[VF_a,None] - mo_ea[VA_a]) dm1a[OA_a[:,None],OA_a] = (doo + doo.T) * .5 dm1a[OF_a[:,None],OA_a] = dco dm1a[OA_a[:,None],OF_a] = dco.T dm1a[VA_a[:,None],VA_a] = (dvv + dvv.T) * .5 dm1a[VF_a[:,None],VA_a] = dfv dm1a[VA_a[:,None],VF_a] = dfv.T dco = Imatb[OF_b[:,None],OA_b] / (mo_eb[OF_b,None] - mo_eb[OA_b]) dfv = Imatb[VF_b[:,None],VA_b] / (mo_eb[VF_b,None] - mo_eb[VA_b]) dm1b[OA_b[:,None],OA_b] = (dOO + dOO.T) * .5 dm1b[OF_b[:,None],OA_b] = dco dm1b[OA_b[:,None],OF_b] = dco.T dm1b[VA_b[:,None],VA_b] = (dVV + dVV.T) * .5 dm1b[VF_b[:,None],VA_b] = dfv dm1b[VA_b[:,None],VF_b] = dfv.T else: dm1a[:nocca,:nocca] = (doo + doo.T) * .5 dm1a[nocca:,nocca:] = (dvv + dvv.T) * .5 dm1b[:noccb,:noccb] = (dOO + dOO.T) * .5 dm1b[noccb:,noccb:] = (dVV + dVV.T) * .5 dm1 = (reduce(numpy.dot, (mo_a, dm1a, mo_a.T)), reduce(numpy.dot, (mo_b, dm1b, mo_b.T))) vhf = mp._scf.get_veff(mp.mol, dm1) Xvo = reduce(numpy.dot, (mo_a[:,nocca:].T, vhf[0], mo_a[:,:nocca])) XVO = reduce(numpy.dot, (mo_b[:,noccb:].T, vhf[1], mo_b[:,:noccb])) Xvo+= Imata[:nocca,nocca:].T - Imata[nocca:,:nocca] XVO+= Imatb[:noccb,noccb:].T - Imatb[noccb:,:noccb] dm1_resp = _response_dm1(mp, (Xvo,XVO)) dm1a += dm1_resp[0] dm1b += dm1_resp[1] time1 = log.timer_debug1('response_rdm1 intermediates', *time1) Imata[nocca:,:nocca] = Imata[:nocca,nocca:].T Imatb[noccb:,:noccb] = Imatb[:noccb,noccb:].T im1 = reduce(numpy.dot, (mo_a, Imata, mo_a.T)) im1+= reduce(numpy.dot, (mo_b, Imatb, mo_b.T)) time1 = log.timer_debug1('response_rdm1', *time1) log.debug('h1 and JK1') hcore_deriv = mf_grad.hcore_generator(mol) s1 = mf_grad.get_ovlp(mol) zeta = (mo_ea[:,None] + mo_ea) * .5 zeta[nocca:,:nocca] = mo_ea[:nocca] zeta[:nocca,nocca:] = mo_ea[:nocca].reshape(-1,1) zeta_a = reduce(numpy.dot, (mo_a, zeta*dm1a, mo_a.T)) zeta = (mo_eb[:,None] + mo_eb) * .5 zeta[noccb:,:noccb] = mo_eb[:noccb] zeta[:noccb,noccb:] = mo_eb[:noccb].reshape(-1,1) zeta_b = reduce(numpy.dot, (mo_b, zeta*dm1b, mo_b.T)) dm1 = (reduce(numpy.dot, (mo_a, dm1a, mo_a.T)), reduce(numpy.dot, (mo_b, dm1b, mo_b.T))) vhf_s1occ = mp._scf.get_veff(mol, (dm1[0]+dm1[0].T, dm1[1]+dm1[1].T)) p1a = numpy.dot(mo_a[:,:nocca], mo_a[:,:nocca].T) p1b = numpy.dot(mo_b[:,:noccb], mo_b[:,:noccb].T) vhf_s1occ = (reduce(numpy.dot, (p1a, vhf_s1occ[0], p1a)) + reduce(numpy.dot, (p1b, vhf_s1occ[1], p1b))) * .5 time1 = log.timer_debug1('h1 and JK1', *time1) # Hartree-Fock part contribution dm1pa = hf_dm1a + dm1[0]*2 dm1pb = hf_dm1b + dm1[1]*2 dm1 = dm1[0] + dm1[1] + hf_dm1 zeta_a += rhf_grad.make_rdm1e(mo_ea, mo_a, mp.mo_occ[0]) zeta_b += rhf_grad.make_rdm1e(mo_eb, mo_b, mp.mo_occ[1]) zeta = zeta_a + zeta_b for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = offsetdic[ia] # s[1] dot I, note matrix im1 is not hermitian de[k] += numpy.einsum('xij,ij->x', s1[:,p0:p1], im1[p0:p1]) de[k] += numpy.einsum('xji,ij->x', s1[:,p0:p1], im1[:,p0:p1]) # h[1] \dot DM, contribute to f1 h1ao = hcore_deriv(ia) de[k] += numpy.einsum('xij,ji->x', h1ao, dm1) # -s[1]*e \dot DM, contribute to f1 de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], zeta[p0:p1] ) de[k] -= numpy.einsum('xji,ij->x', s1[:,p0:p1], zeta[:,p0:p1]) # -vhf[s_ij[1]], contribute to f1, *2 for s1+s1.T de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], vhf_s1occ[p0:p1]) * 2 de[k] -= numpy.einsum('xij,ij->x', vhf1[k,0], dm1pa) de[k] -= numpy.einsum('xij,ij->x', vhf1[k,1], dm1pb) de += mf_grad.grad_nuc(mol) log.timer('%s gradients' % mp.__class__.__name__, *time0) return de
def Lci_dot_dgci_dx(Lci, weights, mc, mo_coeff=None, ci=None, atmlst=None, mf_grad=None, eris=None, verbose=None): ''' Modification of pyscf.grad.casscf.kernel to compute instead the CI Lagrange term nuclear gradient (sum_IJ Lci_IJ d2_Ecas/d_lambda d_PIJ) This involves removing all core-core and nuclear-nuclear terms and making the substitution sum_I w_I<L_I|p'q|I> + c.c. -> <0|p'q|0> sum_I w_I<L_I|p'r'sq|I> + c.c. -> <0|p'r'sq|0> The active-core terms (sum_I w_I<L_I|x'iyi|I>, sum_I w_I <L_I|x'iiy|I>, c.c.) must be retained.''' if mo_coeff is None: mo_coeff = mc.mo_coeff if ci is None: ci = mc.ci if mf_grad is None: mf_grad = mc._scf.nuc_grad_method() if mc.frozen is not None: raise NotImplementedError t0 = (logger.process_clock(), logger.perf_counter()) mol = mc.mol ncore = mc.ncore ncas = mc.ncas nocc = ncore + ncas nelecas = mc.nelecas nao, nmo = mo_coeff.shape nao_pair = nao * (nao + 1) // 2 mo_occ = mo_coeff[:, :nocc] mo_core = mo_coeff[:, :ncore] mo_cas = mo_coeff[:, ncore:nocc] # MRH: TDMs + c.c. instead of RDMs; 06/30/2020: new interface in mcscf.addons makes this much more transparent casdm1, casdm2 = mc.fcisolver.trans_rdm12(Lci, ci, ncas, nelecas) casdm1 += casdm1.transpose(1, 0) casdm2 += casdm2.transpose(1, 0, 3, 2) # gfock = Generalized Fock, Adv. Chem. Phys., 69, 63 dm_core = np.dot(mo_core, mo_core.T) * 2 dm_cas = reduce(np.dot, (mo_cas, casdm1, mo_cas.T)) aapa = np.zeros((ncas, ncas, nmo, ncas), dtype=dm_cas.dtype) for i in range(nmo): aapa[:, :, i, :] = eris.ppaa[i][ncore:nocc, :, :].transpose(1, 2, 0) vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas)) h1 = mc.get_hcore() vhf_c = vj[0] - vk[0] * .5 vhf_a = vj[1] - vk[1] * .5 # MRH: delete h1 + vhf_c from the first line below (core and core-core stuff) # Also extend gfock to span the whole space gfock = np.zeros_like(dm_cas) gfock[:, :nocc] = reduce(np.dot, (mo_coeff.T, vhf_a, mo_occ)) * 2 gfock[:, ncore:nocc] = reduce(np.dot, (mo_coeff.T, h1 + vhf_c, mo_cas, casdm1)) gfock[:, ncore:nocc] += np.einsum('uvpw,vuwt->pt', aapa, casdm2) dme0 = reduce(np.dot, (mo_coeff, (gfock + gfock.T) * .5, mo_coeff.T)) aapa = vj = vk = vhf_c = vhf_a = h1 = gfock = None vj, vk = mf_grad.get_jk(mol, (dm_core, dm_cas)) vhf1c, vhf1a = vj - vk * 0.5 #vhf1c, vhf1a = mf_grad.get_veff(mol, (dm_core, dm_cas)) hcore_deriv = mf_grad.hcore_generator(mol) s1 = mf_grad.get_ovlp(mol) diag_idx = np.arange(nao) diag_idx = diag_idx * (diag_idx + 1) // 2 + diag_idx casdm2_cc = casdm2 + casdm2.transpose(0, 1, 3, 2) dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2, ncas**2), mo_cas.T, (0, nao, 0, nao)).reshape(ncas**2, nao, nao) dm2buf = lib.pack_tril(dm2buf) dm2buf[:, diag_idx] *= .5 dm2buf = dm2buf.reshape(ncas, ncas, nao_pair) casdm2 = casdm2_cc = None if atmlst is None: atmlst = range(mol.natm) aoslices = mol.aoslice_by_atom() de_hcore = np.zeros((len(atmlst), 3)) de_renorm = np.zeros((len(atmlst), 3)) de_eri = np.zeros((len(atmlst), 3)) de = np.zeros((len(atmlst), 3)) max_memory = mc.max_memory - lib.current_memory()[0] blksize = int(max_memory * .9e6 / 8 / (4 * (aoslices[:, 3] - aoslices[:, 2]).max() * nao_pair)) # MRH: 3 components of eri array and 1 density matrix array: FOUR arrays of this size are required! blksize = min(nao, max(2, blksize)) logger.info( mc, 'SA-CASSCF Lci_dot_dgci memory remaining for eri manipulation: {} MB; using blocksize = {}' .format(max_memory, blksize)) t0 = logger.timer(mc, 'SA-CASSCF Lci_dot_dgci 1-electron part', *t0) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = aoslices[ia] h1ao = hcore_deriv(ia) # MRH: dm1 -> dm_cas in the line below de_hcore[k] += np.einsum('xij,ij->x', h1ao, dm_cas) de_renorm[k] -= np.einsum('xij,ij->x', s1[:, p0:p1], dme0[p0:p1]) * 2 q1 = 0 for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize): q0, q1 = q1, q1 + nf dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1], mo_cas[q0:q1]) shls_slice = (shl0, shl1, b0, b1, 0, mol.nbas, 0, mol.nbas) gc.collect() eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl', shls_slice=shls_slice).reshape( 3, p1 - p0, nf, nao_pair) de_eri[k] -= np.einsum('xijw,ijw->x', eri1, dm2_ao) * 2 eri1 = dm2_ao = None gc.collect() t0 = logger.timer( mc, 'SA-CASSCF Lci_dot_dgci atom {} ({},{}|{})'.format( ia, p1 - p0, nf, nao_pair), *t0) # MRH: dm1 -> dm_cas in the line below. Also eliminate core-core terms de_eri[k] += np.einsum('xij,ij->x', vhf1c[:, p0:p1], dm_cas[p0:p1]) * 2 de_eri[k] += np.einsum('xij,ij->x', vhf1a[:, p0:p1], dm_core[p0:p1]) * 2 logger.debug(mc, "CI lagrange hcore component:\n{}".format(de_hcore)) logger.debug(mc, "CI lagrange renorm component:\n{}".format(de_renorm)) logger.debug(mc, "CI lagrange eri component:\n{}".format(de_eri)) de = de_hcore + de_renorm + de_eri return de
def Lorb_dot_dgorb_dx(Lorb, mc, mo_coeff=None, ci=None, atmlst=None, mf_grad=None, eris=None, verbose=None): ''' Modification of pyscf.grad.casscf.kernel to compute instead the orbital Lagrange term nuclear gradient (sum_pq Lorb_pq d2_Ecas/d_lambda d_kpq) This involves removing nuclear-nuclear terms and making the substitution (D_[p]q + D_p[q]) -> D_pq (d_[p]qrs + d_pq[r]s + d_p[q]rs + d_pqr[s]) -> d_pqrs Where [] around an index implies contraction with Lorb from the left, so that the external index (regardless of whether the index on the rdm is bra or ket) is always the first index of Lorb. ''' # dmo = smoT.dao.smo # dao = mo.dmo.moT t0 = (logger.process_clock(), logger.perf_counter()) if mo_coeff is None: mo_coeff = mc.mo_coeff if ci is None: ci = mc.ci if mf_grad is None: mf_grad = mc._scf.nuc_grad_method() if mc.frozen is not None: raise NotImplementedError mol = mc.mol ncore = mc.ncore ncas = mc.ncas nocc = ncore + ncas nelecas = mc.nelecas nao, nmo = mo_coeff.shape nao_pair = nao * (nao + 1) // 2 mo_core = mo_coeff[:, :ncore] mo_cas = mo_coeff[:, ncore:nocc] # MRH: new 'effective' MO coefficients including contraction from the Lagrange multipliers moL_coeff = np.dot(mo_coeff, Lorb) s0_inv = np.dot(mo_coeff, mo_coeff.T) moL_core = moL_coeff[:, :ncore] moL_cas = moL_coeff[:, ncore:nocc] # MRH: these SHOULD be state-averaged! Use the actual sacasscf object! casdm1, casdm2 = mc.fcisolver.make_rdm12(ci, ncas, nelecas) # gfock = Generalized Fock, Adv. Chem. Phys., 69, 63 # MRH: each index exactly once! dm_core = np.dot(mo_core, mo_core.T) * 2 dm_cas = reduce(np.dot, (mo_cas, casdm1, mo_cas.T)) # MRH: new density matrix terms dmL_core = np.dot(moL_core, mo_core.T) * 2 dmL_cas = reduce(np.dot, (moL_cas, casdm1, mo_cas.T)) dmL_core += dmL_core.T dmL_cas += dmL_cas.T dm1 = dm_core + dm_cas dm1L = dmL_core + dmL_cas # MRH: end new density matrix terms # MRH: wrap the integral instead of the density matrix. I THINK the sign is the same! # mo sets 0 and 2 should be transposed, 1 and 3 should be not transposed; this will lead to correct sign # Except I can't do this for the external index, because the external index is contracted to ovlp matrix, # not the 2RDM aapa = np.zeros((ncas, ncas, nmo, ncas), dtype=dm_cas.dtype) aapaL = np.zeros((ncas, ncas, nmo, ncas), dtype=dm_cas.dtype) for i in range(nmo): jbuf = eris.ppaa[i] kbuf = eris.papa[i] aapa[:, :, i, :] = jbuf[ncore:nocc, :, :].transpose(1, 2, 0) aapaL[:, :, i, :] += np.tensordot(jbuf, Lorb[:, ncore:nocc], axes=((0), (0))) kbuf = np.tensordot(kbuf, Lorb[:, ncore:nocc], axes=((1), (0))).transpose(1, 2, 0) aapaL[:, :, i, :] += kbuf + kbuf.transpose(1, 0, 2) # MRH: new vhf terms vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas)) vjL, vkL = mc._scf.get_jk(mol, (dmL_core, dmL_cas)) h1 = mc.get_hcore() vhf_c = vj[0] - vk[0] * .5 vhf_a = vj[1] - vk[1] * .5 vhfL_c = vjL[0] - vkL[0] * .5 vhfL_a = vjL[1] - vkL[1] * .5 # MRH: I rewrote this Feff calculation completely, double-check it gfock = np.dot(h1, dm1L) # h1e gfock += np.dot((vhf_c + vhf_a), dmL_core) # core-core and active-core, 2nd 1RDM linked gfock += np.dot((vhfL_c + vhfL_a), dm_core) # core-core and active-core, 1st 1RDM linked gfock += np.dot(vhfL_c, dm_cas) # core-active, 1st 1RDM linked gfock += np.dot(vhf_c, dmL_cas) # core-active, 2nd 1RDM linked gfock = np.dot( s0_inv, gfock ) # Definition of quantity is in MO's; going (AO->MO->AO) incurs an inverse ovlp gfock += reduce(np.dot, (mo_coeff, np.einsum( 'uviw,uvtw->it', aapaL, casdm2), mo_cas.T)) # active-active # MRH: I have to contract this external 2RDM index explicitly on the 2RDM but fortunately I can do so here gfock += reduce( np.dot, (mo_coeff, np.einsum('uviw,vuwt->it', aapa, casdm2), moL_cas.T)) # MRH: As of 04/18/2019, the two-body part of this is including aapaL is definitely, unambiguously correct dme0 = (gfock + gfock.T) / 2 # This transpose is for the overlap matrix later on aapa = vj = vk = vhf_c = vhf_a = None vj, vk = mf_grad.get_jk(mol, (dm_core, dm_cas, dmL_core, dmL_cas)) vhf1c, vhf1a, vhf1cL, vhf1aL = vj - vk * 0.5 #vhf1c, vhf1a, vhf1cL, vhf1aL = mf_grad.get_veff(mol, (dm_core, dm_cas, dmL_core, dmL_cas)) hcore_deriv = mf_grad.hcore_generator(mol) s1 = mf_grad.get_ovlp(mol) diag_idx = np.arange(nao) diag_idx = diag_idx * (diag_idx + 1) // 2 + diag_idx casdm2_cc = casdm2 + casdm2.transpose(0, 1, 3, 2) dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2, ncas**2), mo_cas.T, (0, nao, 0, nao)).reshape(ncas**2, nao, nao) # MRH: contract the final two indices of the active-active 2RDM with L as you change to AOs # note tensordot always puts indices in the order of the arguments. dm2Lbuf = np.zeros((ncas**2, nmo, nmo)) # MRH: The second line below transposes the L; the third line transposes the derivative later on # Both the L and the derivative have to explore all indices dm2Lbuf[:, :, ncore:nocc] = np.tensordot( Lorb[:, ncore:nocc], casdm2, axes=(1, 2)).transpose(1, 2, 0, 3).reshape(ncas**2, nmo, ncas) dm2Lbuf[:, ncore:nocc, :] += np.tensordot( Lorb[:, ncore:nocc], casdm2, axes=(1, 3)).transpose(1, 2, 3, 0).reshape(ncas**2, ncas, nmo) dm2Lbuf += dm2Lbuf.transpose(0, 2, 1) dm2Lbuf = np.ascontiguousarray(dm2Lbuf) dm2Lbuf = ao2mo._ao2mo.nr_e2(dm2Lbuf.reshape(ncas**2, nmo**2), mo_coeff.T, (0, nao, 0, nao)).reshape(ncas**2, nao, nao) dm2buf = lib.pack_tril(dm2buf) dm2buf[:, diag_idx] *= .5 dm2buf = dm2buf.reshape(ncas, ncas, nao_pair) dm2Lbuf = lib.pack_tril(dm2Lbuf) dm2Lbuf[:, diag_idx] *= .5 dm2Lbuf = dm2Lbuf.reshape(ncas, ncas, nao_pair) if atmlst is None: atmlst = list(range(mol.natm)) aoslices = mol.aoslice_by_atom() de_hcore = np.zeros((len(atmlst), 3)) de_renorm = np.zeros((len(atmlst), 3)) de_eri = np.zeros((len(atmlst), 3)) de = np.zeros((len(atmlst), 3)) max_memory = mc.max_memory - lib.current_memory()[0] blksize = int(max_memory * .9e6 / 8 / (4 * (aoslices[:, 3] - aoslices[:, 2]).max() * nao_pair)) # MRH: 3 components of eri array and 1 density matrix array: FOUR arrays of this size are required! blksize = min(nao, max(2, blksize)) logger.info( mc, 'SA-CASSCF Lorb_dot_dgorb memory remaining for eri manipulation: {} MB; using blocksize = {}' .format(max_memory, blksize)) t0 = logger.timer(mc, 'SA-CASSCF Lorb_dot_dgorb 1-electron part', *t0) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = aoslices[ia] h1ao = hcore_deriv(ia) # MRH: h1e and Feff terms de_hcore[k] += np.einsum('xij,ij->x', h1ao, dm1L) de_renorm[k] -= np.einsum('xij,ij->x', s1[:, p0:p1], dme0[p0:p1]) * 2 q1 = 0 for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize): q0, q1 = q1, q1 + nf dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2Lbuf, mo_cas[p0:p1], mo_cas[q0:q1]) # MRH: now contract the first two indices of the active-active 2RDM with L as you go from MOs to AOs dm2_ao += lib.einsum('ijw,pi,qj->pqw', dm2buf, moL_cas[p0:p1], mo_cas[q0:q1]) dm2_ao += lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1], moL_cas[q0:q1]) shls_slice = (shl0, shl1, b0, b1, 0, mol.nbas, 0, mol.nbas) gc.collect() eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl', shls_slice=shls_slice).reshape( 3, p1 - p0, nf, nao_pair) # MRH: I still don't understand why there is a minus here! de_eri[k] -= np.einsum('xijw,ijw->x', eri1, dm2_ao) * 2 eri1 = dm2_ao = None gc.collect() t0 = logger.timer( mc, 'SA-CASSCF Lorb_dot_dgorb atom {} ({},{}|{})'.format( ia, p1 - p0, nf, nao_pair), *t0) # MRH: core-core and core-active 2RDM terms de_eri[k] += np.einsum('xij,ij->x', vhf1c[:, p0:p1], dm1L[p0:p1]) * 2 de_eri[k] += np.einsum('xij,ij->x', vhf1cL[:, p0:p1], dm1[p0:p1]) * 2 # MRH: active-core 2RDM terms de_eri[k] += np.einsum('xij,ij->x', vhf1a[:, p0:p1], dmL_core[p0:p1]) * 2 de_eri[k] += np.einsum('xij,ij->x', vhf1aL[:, p0:p1], dm_core[p0:p1]) * 2 # MRH: deleted the nuclear-nuclear part to avoid double-counting # lesson learned from debugging - mol.intor computes -1 * the derivative and only # for one index # on the other hand, mf_grad.hcore_generator computes the actual derivative of # h1 for both indices and with the correct sign logger.debug(mc, "Orb lagrange hcore component:\n{}".format(de_hcore)) logger.debug(mc, "Orb lagrange renorm component:\n{}".format(de_renorm)) logger.debug(mc, "Orb lagrange eri component:\n{}".format(de_eri)) de = de_hcore + de_renorm + de_eri return de
def kernel(mc, mo_coeff=None, ci=None, atmlst=None, mf_grad=None, verbose=None): if mo_coeff is None: mo_coeff = mc._scf.mo_coeff if ci is None: ci = mc.ci if mf_grad is None: mf_grad = mc._scf.nuc_grad_method() mol = mc.mol ncore = mc.ncore ncas = mc.ncas nocc = ncore + ncas nelecas = mc.nelecas nao, nmo = mo_coeff.shape nao_pair = nao * (nao+1) // 2 mo_energy = mc._scf.mo_energy hcore_deriv = mf_grad.hcore_generator(mol) s1 = mf_grad.get_ovlp(mol) mo_occ = mo_coeff[:,:nocc] mo_core = mo_coeff[:,:ncore] mo_cas = mo_coeff[:,ncore:nocc] casdm1, casdm2 = mc.fcisolver.make_rdm12(mc.ci, ncas, nelecas) # gfock = Generalized Fock, Adv. Chem. Phys., 69, 63 dm_core = numpy.dot(mo_core, mo_core.T) * 2 dm_cas = reduce(numpy.dot, (mo_cas, casdm1, mo_cas.T)) aapa = ao2mo.kernel(mol, (mo_cas, mo_cas, mo_occ, mo_cas), compact=False) aapa = aapa.reshape(ncas,ncas,nocc,ncas) vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas)) h1 = mc.get_hcore() vhf_c = vj[0] - vk[0] * .5 vhf_a = vj[1] - vk[1] * .5 gfock = reduce(numpy.dot, (mo_occ.T, h1 + vhf_c + vhf_a, mo_occ)) * 2 gfock[:,ncore:nocc] = reduce(numpy.dot, (mo_occ.T, h1 + vhf_c, mo_cas, casdm1)) gfock[:,ncore:nocc] += numpy.einsum('uviw,vuwt->it', aapa, casdm2) dme0 = reduce(numpy.dot, (mo_occ, (gfock+gfock.T)*.5, mo_occ.T)) aapa = vj = vk = vhf_c = vhf_a = h1 = gfock = None dm1 = dm_core + dm_cas vhf1c, vhf1a = mf_grad.get_veff(mol, (dm_core, dm_cas)) diag_idx = numpy.arange(nao) diag_idx = diag_idx * (diag_idx+1) // 2 + diag_idx casdm2_cc = casdm2 + casdm2.transpose(0,1,3,2) dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2,ncas**2), mo_cas.T, (0, nao, 0, nao)).reshape(ncas**2,nao,nao) dm2buf = lib.pack_tril(dm2buf) dm2buf[:,diag_idx] *= .5 dm2buf = dm2buf.reshape(ncas,ncas,nao_pair) #casdm2 = casdm2_cc = None atmlst = range(mol.natm) aoslices = mol.aoslice_by_atom() de = numpy.zeros((len(atmlst),3)) max_memory = mc.max_memory - lib.current_memory()[0] blksize = int(max_memory*.9e6/8 / ((aoslices[:,3]-aoslices[:,2]).max()*nao_pair)) blksize = min(nao, max(2, blksize)) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = aoslices[ia] h1ao = hcore_deriv(ia) de[k] += numpy.einsum('xij,ij->x', h1ao, dm1) #de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], dme0[p0:p1]) * 2 q1 = 0 for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize): q0, q1 = q1, q1 + nf dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1], mo_cas[q0:q1]) shls_slice = (shl0,shl1,b0,b1,0,mol.nbas,0,mol.nbas) eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl', shls_slice=shls_slice).reshape(3,p1-p0,nf,nao_pair) de[k] -= numpy.einsum('xijw,ijw->x', eri1, dm2_ao) * 2 eri1 = None de[k] += numpy.einsum('xij,ij->x', vhf1c[:,p0:p1], dm1[p0:p1]) * 2 de[k] += numpy.einsum('xij,ij->x', vhf1a[:,p0:p1], dm_core[p0:p1]) * 2 dm2 = numpy.zeros((nmo,nmo,nmo,nmo)) for i in range(ncore): for j in range(ncore): dm2[i,i,j,j] += 4 dm2[i,j,j,i] -= 2 dm2[i,i,ncore:nocc,ncore:nocc] = casdm1 * 2 dm2[ncore:nocc,ncore:nocc,i,i] = casdm1 * 2 dm2[i,ncore:nocc,ncore:nocc,i] =-casdm1 dm2[ncore:nocc,i,i,ncore:nocc] =-casdm1 dm2[ncore:nocc,ncore:nocc,ncore:nocc,ncore:nocc] = casdm2 eri0 = ao2mo.restore(1, ao2mo.full(mc._scf._eri, mo_coeff), nmo) Imat = numpy.einsum('pjkl,qjkl->pq', eri0, dm2) dm1 = numpy.zeros((nmo,nmo)) for i in range(ncore): dm1[i,i] = 2 dm1[ncore:nocc,ncore:nocc] = casdm1 neleca, nelecb = mol.nelec h1 =-(mol.intor('int1e_ipkin', comp=3) +mol.intor('int1e_ipnuc', comp=3)) s1 =-mol.intor('int1e_ipovlp', comp=3) eri1 = mol.intor('int2e_ip1', comp=3).reshape(3,nao,nao,nao,nao) eri1 = numpy.einsum('xipkl,pj->xijkl', eri1, mo_coeff) eri1 = numpy.einsum('xijpl,pk->xijkl', eri1, mo_coeff) eri1 = numpy.einsum('xijkp,pl->xijkl', eri1, mo_coeff) h0 = reduce(numpy.dot, (mo_coeff.T, mc._scf.get_hcore(), mo_coeff)) g0 = ao2mo.restore(1, ao2mo.full(mol, mo_coeff), nmo) def hess(): nocc = mol.nelectron//2 nvir = nmo - nocc eri_mo = g0 eai = lib.direct_sum('a-i->ai', mo_energy[nocc:], mo_energy[:nocc]) h = eri_mo[nocc:,:nocc,nocc:,:nocc] * 4 h-= numpy.einsum('cdlk->ckdl', eri_mo[nocc:,nocc:,:nocc,:nocc]) h-= numpy.einsum('cldk->ckdl', eri_mo[nocc:,:nocc,nocc:,:nocc]) for a in range(nvir): for i in range(nocc): h[a,i,a,i] += eai[a,i] return -h.reshape(nocc*nvir,-1) hh = hess() ee = mo_energy[:,None] - mo_energy for k,(sh0, sh1, p0, p1) in enumerate(mol.offset_nr_by_atom()): mol.set_rinv_origin(mol.atom_coord(k)) vrinv = -mol.atom_charge(k) * mol.intor('int1e_iprinv', comp=3) # 2e AO integrals dot 2pdm for i in range(3): g1 = numpy.einsum('pjkl,pi->ijkl', eri1[i,p0:p1], mo_coeff[p0:p1]) g1 = g1 + g1.transpose(1,0,2,3) g1 = g1 + g1.transpose(2,3,0,1) g1 *= -1 hx =(numpy.einsum('pq,pi,qj->ij', h1[i,p0:p1], mo_coeff[p0:p1], mo_coeff) + reduce(numpy.dot, (mo_coeff.T, vrinv[i], mo_coeff))) hx = hx + hx.T sx = numpy.einsum('pq,pi,qj->ij', s1[i,p0:p1], mo_coeff[p0:p1], mo_coeff) sx = sx + sx.T fij =(hx[:neleca,:neleca] - numpy.einsum('ij,j->ij', sx[:neleca,:neleca], mo_energy[:neleca]) - numpy.einsum('kl,ijlk->ij', sx[:neleca,:neleca], g0[:neleca,:neleca,:neleca,:neleca]) * 2 + numpy.einsum('kl,iklj->ij', sx[:neleca,:neleca], g0[:neleca,:neleca,:neleca,:neleca]) + numpy.einsum('ijkk->ij', g1[:neleca,:neleca,:neleca,:neleca]) * 2 - numpy.einsum('ikkj->ij', g1[:neleca,:neleca,:neleca,:neleca])) fab =(hx[neleca:,neleca:] - numpy.einsum('ij,j->ij', sx[neleca:,neleca:], mo_energy[neleca:]) - numpy.einsum('kl,ijlk->ij', sx[:neleca,:neleca], g0[neleca:,neleca:,:neleca,:neleca]) * 2 + numpy.einsum('kl,iklj->ij', sx[:neleca,:neleca], g0[neleca:,:neleca,:neleca,neleca:]) + numpy.einsum('ijkk->ij', g1[neleca:,neleca:,:neleca,:neleca]) * 2 - numpy.einsum('ikkj->ij', g1[neleca:,:neleca,:neleca,neleca:])) fai =(hx[neleca:,:neleca] - numpy.einsum('ai,i->ai', sx[neleca:,:neleca], mo_energy[:neleca]) - numpy.einsum('kl,ijlk->ij', sx[:neleca,:neleca], g0[neleca:,:neleca,:neleca,:neleca]) * 2 + numpy.einsum('kl,iklj->ij', sx[:neleca,:neleca], g0[neleca:,:neleca,:neleca,:neleca]) + numpy.einsum('ijkk->ij', g1[neleca:,:neleca,:neleca,:neleca]) * 2 - numpy.einsum('ikkj->ij', g1[neleca:,:neleca,:neleca,:neleca])) c1 = numpy.zeros((nmo,nmo)) c1[:neleca,:neleca] = -.5 * sx[:neleca,:neleca] c1[neleca:,neleca:] = -.5 * sx[neleca:,neleca:] cvo1 = numpy.linalg.solve(hh, fai.ravel()).reshape(-1,neleca) cov1 = -(sx[neleca:,:neleca] + cvo1).T c1[neleca:,:neleca] = cvo1 c1[:neleca,neleca:] = cov1 v1 = numpy.einsum('pqai,ai->pq', g0[:,:,neleca:,:neleca], cvo1) * 4 v1-= numpy.einsum('paiq,ai->pq', g0[:,neleca:,:neleca,:], cvo1) v1-= numpy.einsum('piaq,ai->pq', g0[:,:neleca,neleca:,:], cvo1) fij += v1[:neleca,:neleca] fab += v1[neleca:,neleca:] c1[:ncore,ncore:neleca] = -fij[:ncore,ncore:] / ee[:ncore,ncore:neleca] c1[ncore:neleca,:ncore] = -fij[ncore:,:ncore] / ee[ncore:neleca,:ncore] m = nocc - neleca c1[nocc:,neleca:nocc] = -fab[m:,:m] / ee[nocc:,neleca:nocc] c1[neleca:nocc,nocc:] = -fab[:m,m:] / ee[neleca:nocc,nocc:] h0c1 = h0.dot(c1) h0c1 = h0c1 + h0c1.T g0c1 = numpy.einsum('pjkl,pi->ijkl', g0, c1) g0c1 = g0c1 + g0c1.transpose(1,0,2,3) g0c1 = g0c1 + g0c1.transpose(2,3,0,1) de[k,i] += numpy.einsum('ij,ji', h0c1, dm1) de[k,i] += numpy.einsum('ijkl,jilk', g0c1, dm2)*.5 de += rhf_grad.grad_nuc(mol) return de
def Lci_dot_dgci_dx(Lci, weights, mc, mo_coeff=None, ci=None, atmlst=None, mf_grad=None, verbose=None): ''' Modification of pyscf.grad.casscf.kernel to compute instead the CI Lagrange term nuclear gradient (sum_IJ Lci_IJ d2_Ecas/d_lambda d_PIJ) This involves removing all core-core and nuclear-nuclear terms and making the substitution sum_I w_I<L_I|p'q|I> + c.c. -> <0|p'q|0> sum_I w_I<L_I|p'r'sq|I> + c.c. -> <0|p'r'sq|0> The active-core terms (sum_I w_I<L_I|x'iyi|I>, sum_I w_I <L_I|x'iiy|I>, c.c.) must be retained.''' if mo_coeff is None: mo_coeff = mc.mo_coeff if ci is None: ci = mc.ci if mf_grad is None: mf_grad = mc._scf.nuc_grad_method() if mc.frozen is not None: raise NotImplementedError mol = mc.mol ncore = mc.ncore ncas = mc.ncas nocc = ncore + ncas nelecas = mc.nelecas nao, nmo = mo_coeff.shape nao_pair = nao * (nao + 1) // 2 nroots = ci.shape[0] mo_occ = mo_coeff[:, :nocc] mo_core = mo_coeff[:, :ncore] mo_cas = mo_coeff[:, ncore:nocc] # MRH: TDMs + c.c. instead of RDMs casdm1 = np.zeros((nroots, ncas, ncas)) casdm2 = np.zeros((nroots, ncas, ncas, ncas, ncas)) for iroot in range(nroots): #print ("norm of Lci, ci for root {}: {} {}".format (iroot, linalg.norm (Lci[iroot]), linalg.norm (ci[iroot]))) casdm1[iroot], casdm2[iroot] = mc.fcisolver.trans_rdm12( Lci[iroot], ci[iroot], ncas, nelecas) casdm1 = (casdm1 * weights[:, None, None]).sum(0) casdm2 = (casdm2 * weights[:, None, None, None, None]).sum(0) casdm1 += casdm1.transpose(1, 0) casdm2 += casdm2.transpose(1, 0, 3, 2) # gfock = Generalized Fock, Adv. Chem. Phys., 69, 63 dm_core = np.dot(mo_core, mo_core.T) * 2 dm_cas = reduce(np.dot, (mo_cas, casdm1, mo_cas.T)) aapa = ao2mo.kernel(mol, (mo_cas, mo_cas, mo_coeff, mo_cas), compact=False) aapa = aapa.reshape(ncas, ncas, nmo, ncas) vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas)) h1 = mc.get_hcore() vhf_c = vj[0] - vk[0] * .5 vhf_a = vj[1] - vk[1] * .5 # MRH: delete h1 + vhf_c from the first line below (core and core-core stuff) # Also extend gfock to span the whole space gfock = np.zeros_like(dm_cas) gfock[:, :nocc] = reduce(np.dot, (mo_coeff.T, vhf_a, mo_occ)) * 2 gfock[:, ncore:nocc] = reduce(np.dot, (mo_coeff.T, h1 + vhf_c, mo_cas, casdm1)) gfock[:, ncore:nocc] += np.einsum('uvpw,vuwt->pt', aapa, casdm2) dme0 = reduce(np.dot, (mo_coeff, (gfock + gfock.T) * .5, mo_coeff.T)) aapa = vj = vk = vhf_c = vhf_a = h1 = gfock = None vhf1c, vhf1a = mf_grad.get_veff(mol, (dm_core, dm_cas)) hcore_deriv = mf_grad.hcore_generator(mol) s1 = mf_grad.get_ovlp(mol) diag_idx = np.arange(nao) diag_idx = diag_idx * (diag_idx + 1) // 2 + diag_idx casdm2_cc = casdm2 + casdm2.transpose(0, 1, 3, 2) dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2, ncas**2), mo_cas.T, (0, nao, 0, nao)).reshape(ncas**2, nao, nao) dm2buf = lib.pack_tril(dm2buf) dm2buf[:, diag_idx] *= .5 dm2buf = dm2buf.reshape(ncas, ncas, nao_pair) casdm2 = casdm2_cc = None if atmlst is None: atmlst = range(mol.natm) aoslices = mol.aoslice_by_atom() de_hcore = np.zeros((len(atmlst), 3)) de_renorm = np.zeros((len(atmlst), 3)) de_eri = np.zeros((len(atmlst), 3)) de = np.zeros((len(atmlst), 3)) max_memory = mc.max_memory - lib.current_memory()[0] blksize = int(max_memory * .9e6 / 8 / ((aoslices[:, 3] - aoslices[:, 2]).max() * nao_pair)) blksize = min(nao, max(2, blksize)) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = aoslices[ia] h1ao = hcore_deriv(ia) # MRH: dm1 -> dm_cas in the line below de_hcore[k] += np.einsum('xij,ij->x', h1ao, dm_cas) de_renorm[k] -= np.einsum('xij,ij->x', s1[:, p0:p1], dme0[p0:p1]) * 2 q1 = 0 for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize): q0, q1 = q1, q1 + nf dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1], mo_cas[q0:q1]) shls_slice = (shl0, shl1, b0, b1, 0, mol.nbas, 0, mol.nbas) eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl', shls_slice=shls_slice).reshape( 3, p1 - p0, nf, nao_pair) de_eri[k] -= np.einsum('xijw,ijw->x', eri1, dm2_ao) * 2 eri1 = None # MRH: dm1 -> dm_cas in the line below. Also eliminate core-core terms de_eri[k] += np.einsum('xij,ij->x', vhf1c[:, p0:p1], dm_cas[p0:p1]) * 2 de_eri[k] += np.einsum('xij,ij->x', vhf1a[:, p0:p1], dm_core[p0:p1]) * 2 lib.logger.debug(mc, "CI lagrange hcore component:\n{}".format(de_hcore)) lib.logger.debug(mc, "CI lagrange renorm component:\n{}".format(de_renorm)) lib.logger.debug(mc, "CI lagrange eri component:\n{}".format(de_eri)) de = de_hcore + de_renorm + de_eri return de
def kernel(mycc, t1=None, t2=None, l1=None, l2=None, eris=None, atmlst=None, mf_grad=None, d1=None, d2=None, verbose=logger.INFO): if eris is not None: if abs(eris.fock - numpy.diag(eris.fock.diagonal())).max() > 1e-3: raise RuntimeError('CCSD gradients does not support NHF (non-canonical HF)') if t1 is None: t1 = mycc.t1 if t2 is None: t2 = mycc.t2 if l1 is None: l1 = mycc.l1 if l2 is None: l2 = mycc.l2 if mf_grad is None: mf_grad = mycc._scf.nuc_grad_method() log = logger.new_logger(mycc, verbose) time0 = time.clock(), time.time() log.debug('Build ccsd rdm1 intermediates') if d1 is None: d1 = ccsd_rdm._gamma1_intermediates(mycc, t1, t2, l1, l2) doo, dov, dvo, dvv = d1 time1 = log.timer_debug1('rdm1 intermediates', *time0) log.debug('Build ccsd rdm2 intermediates') fdm2 = lib.H5TmpFile() if d2 is None: d2 = ccsd_rdm._gamma2_outcore(mycc, t1, t2, l1, l2, fdm2, True) time1 = log.timer_debug1('rdm2 intermediates', *time1) mol = mycc.mol mo_coeff = mycc.mo_coeff mo_energy = mycc._scf.mo_energy nao, nmo = mo_coeff.shape nocc = numpy.count_nonzero(mycc.mo_occ > 0) with_frozen = not (mycc.frozen is None or mycc.frozen is 0) OA, VA, OF, VF = _index_frozen_active(mycc.get_frozen_mask(), mycc.mo_occ) log.debug('symmetrized rdm2 and MO->AO transformation') # Roughly, dm2*2 is computed in _rdm2_mo2ao mo_active = mo_coeff[:,numpy.hstack((OA,VA))] _rdm2_mo2ao(mycc, d2, mo_active, fdm2) # transform the active orbitals time1 = log.timer_debug1('MO->AO transformation', *time1) hf_dm1 = mycc._scf.make_rdm1(mycc.mo_coeff, mycc.mo_occ) if atmlst is None: atmlst = range(mol.natm) offsetdic = mol.offset_nr_by_atom() diagidx = numpy.arange(nao) diagidx = diagidx*(diagidx+1)//2 + diagidx de = numpy.zeros((len(atmlst),3)) Imat = numpy.zeros((nao,nao)) vhf1 = fdm2.create_dataset('vhf1', (len(atmlst),3,nao,nao), 'f8') # 2e AO integrals dot 2pdm max_memory = max(0, mycc.max_memory - lib.current_memory()[0]) blksize = max(1, int(max_memory*.9e6/8/(nao**3*2.5))) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = offsetdic[ia] ip1 = p0 vhf = numpy.zeros((3,nao,nao)) for b0, b1, nf in _shell_prange(mol, shl0, shl1, blksize): ip0, ip1 = ip1, ip1 + nf dm2buf = _load_block_tril(fdm2['dm2'], ip0, ip1, nao) dm2buf[:,:,diagidx] *= .5 shls_slice = (b0,b1,0,mol.nbas,0,mol.nbas,0,mol.nbas) eri0 = mol.intor('int2e', aosym='s2kl', shls_slice=shls_slice) Imat += lib.einsum('ipx,iqx->pq', eri0.reshape(nf,nao,-1), dm2buf) eri0 = None eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl', shls_slice=shls_slice).reshape(3,nf,nao,-1) de[k] -= numpy.einsum('xijk,ijk->x', eri1, dm2buf) * 2 dm2buf = None # HF part for i in range(3): eri1tmp = lib.unpack_tril(eri1[i].reshape(nf*nao,-1)) eri1tmp = eri1tmp.reshape(nf,nao,nao,nao) vhf[i] += numpy.einsum('ijkl,ij->kl', eri1tmp, hf_dm1[ip0:ip1]) vhf[i] -= numpy.einsum('ijkl,il->kj', eri1tmp, hf_dm1[ip0:ip1]) * .5 vhf[i,ip0:ip1] += numpy.einsum('ijkl,kl->ij', eri1tmp, hf_dm1) vhf[i,ip0:ip1] -= numpy.einsum('ijkl,jk->il', eri1tmp, hf_dm1) * .5 eri1 = eri1tmp = None vhf1[k] = vhf log.debug('2e-part grad of atom %d %s = %s', ia, mol.atom_symbol(ia), de[k]) time1 = log.timer_debug1('2e-part grad of atom %d'%ia, *time1) Imat = reduce(numpy.dot, (mo_coeff.T, Imat, mycc._scf.get_ovlp(), mo_coeff)) * -1 dm1mo = numpy.zeros((nmo,nmo)) if with_frozen: dco = Imat[OF[:,None],OA] / (mo_energy[OF,None] - mo_energy[OA]) dfv = Imat[VF[:,None],VA] / (mo_energy[VF,None] - mo_energy[VA]) dm1mo[OA[:,None],OA] = doo + doo.T dm1mo[OF[:,None],OA] = dco dm1mo[OA[:,None],OF] = dco.T dm1mo[VA[:,None],VA] = dvv + dvv.T dm1mo[VF[:,None],VA] = dfv dm1mo[VA[:,None],VF] = dfv.T else: dm1mo[:nocc,:nocc] = doo + doo.T dm1mo[nocc:,nocc:] = dvv + dvv.T dm1 = reduce(numpy.dot, (mo_coeff, dm1mo, mo_coeff.T)) vhf = mycc._scf.get_veff(mycc.mol, dm1) * 2 Xvo = reduce(numpy.dot, (mo_coeff[:,nocc:].T, vhf, mo_coeff[:,:nocc])) Xvo+= Imat[:nocc,nocc:].T - Imat[nocc:,:nocc] dm1mo += _response_dm1(mycc, Xvo, eris) time1 = log.timer_debug1('response_rdm1 intermediates', *time1) Imat[nocc:,:nocc] = Imat[:nocc,nocc:].T im1 = reduce(numpy.dot, (mo_coeff, Imat, mo_coeff.T)) time1 = log.timer_debug1('response_rdm1', *time1) log.debug('h1 and JK1') hcore_deriv = mf_grad.hcore_generator(mol) s1 = mf_grad.get_ovlp(mol) zeta = lib.direct_sum('i+j->ij', mo_energy, mo_energy) * .5 zeta[nocc:,:nocc] = mo_energy[:nocc] zeta[:nocc,nocc:] = mo_energy[:nocc].reshape(-1,1) zeta = reduce(numpy.dot, (mo_coeff, zeta*dm1mo, mo_coeff.T)) dm1 = reduce(numpy.dot, (mo_coeff, dm1mo, mo_coeff.T)) p1 = numpy.dot(mo_coeff[:,:nocc], mo_coeff[:,:nocc].T) vhf_s1occ = reduce(numpy.dot, (p1, mycc._scf.get_veff(mol, dm1+dm1.T), p1)) time1 = log.timer_debug1('h1 and JK1', *time1) # Hartree-Fock part contribution dm1p = hf_dm1 + dm1*2 dm1 += hf_dm1 zeta += mf_grad.make_rdm1e(mo_energy, mo_coeff, mycc.mo_occ) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = offsetdic[ia] # s[1] dot I, note matrix im1 is not hermitian de[k] += numpy.einsum('xij,ij->x', s1[:,p0:p1], im1[p0:p1]) de[k] += numpy.einsum('xji,ij->x', s1[:,p0:p1], im1[:,p0:p1]) # h[1] \dot DM, contribute to f1 h1ao = hcore_deriv(ia) de[k] += numpy.einsum('xij,ji->x', h1ao, dm1) # -s[1]*e \dot DM, contribute to f1 de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], zeta[p0:p1] ) de[k] -= numpy.einsum('xji,ij->x', s1[:,p0:p1], zeta[:,p0:p1]) # -vhf[s_ij[1]], contribute to f1, *2 for s1+s1.T de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], vhf_s1occ[p0:p1]) * 2 de[k] -= numpy.einsum('xij,ij->x', vhf1[k], dm1p) de += mf_grad.grad_nuc(mol, atmlst) log.timer('%s gradients' % mycc.__class__.__name__, *time0) return de
def Lorb_dot_dgorb_dx(Lorb, mc, mo_coeff=None, ci=None, atmlst=None, mf_grad=None, eris=None, verbose=None): ''' Modification of single-state CASSCF electronic energy nuclear gradient to compute instead the orbital Lagrange term nuclear gradient: sum_pq Lorb_pq d2_Ecas/d_lambda d_kpq This involves the effective density matrices ~D_pq = L_pr*D_rq + L_qr*D_pr ~d_pqrs = L_pt*d_tqrs + L_rt*d_pqts + L_qt*d_ptrs + L_st*d_pqrt (NB: L_pq = -L_qp) ''' # dmo = smoT.dao.smo # dao = mo.dmo.moT t0 = (logger.process_clock(), logger.perf_counter()) if mo_coeff is None: mo_coeff = mc.mo_coeff if ci is None: ci = mc.ci if mf_grad is None: mf_grad = mc._scf.nuc_grad_method() if mc.frozen is not None: raise NotImplementedError mol = mc.mol ncore = mc.ncore ncas = mc.ncas nocc = ncore + ncas nelecas = mc.nelecas nao, nmo = mo_coeff.shape nao_pair = nao * (nao + 1) // 2 mo_core = mo_coeff[:, :ncore] mo_cas = mo_coeff[:, ncore:nocc] # MRH: new 'effective' MO coefficients including contraction from the Lagrange multipliers moL_coeff = np.dot(mo_coeff, Lorb) s0_inv = np.dot(mo_coeff, mo_coeff.T) moL_core = moL_coeff[:, :ncore] moL_cas = moL_coeff[:, ncore:nocc] # MRH: these SHOULD be state-averaged! Use the actual sacasscf object! casdm1, casdm2 = mc.fcisolver.make_rdm12(ci, ncas, nelecas) # gfock = Generalized Fock, Adv. Chem. Phys., 69, 63 dm_core = np.dot(mo_core, mo_core.T) * 2 dm_cas = reduce(np.dot, (mo_cas, casdm1, mo_cas.T)) # MRH: new density matrix terms dmL_core = np.dot(moL_core, mo_core.T) * 2 dmL_cas = reduce(np.dot, (moL_cas, casdm1, mo_cas.T)) dmL_core += dmL_core.T dmL_cas += dmL_cas.T dm1 = dm_core + dm_cas dm1L = dmL_core + dmL_cas # MRH: wrap the integral instead of the density matrix. # g_prst*~d_qrst = (g_pust*L_ur + g_prut*L_us + g_prsu*L_ut)*d_qrst + g_prst*L_uq*d_urst # = 'aapaL'_prst*d_qrst [ERI TERM 1] # = 'aapa'_prst*L_uq*d_urst [ERI TERM 2] aapa = np.zeros((ncas, ncas, nmo, ncas), dtype=dm_cas.dtype) aapaL = np.zeros((ncas, ncas, nmo, ncas), dtype=dm_cas.dtype) for i in range(nmo): jbuf = eris.ppaa[i] kbuf = eris.papa[i] aapa[:, :, i, :] = jbuf[ncore:nocc, :, :].transpose(1, 2, 0) aapaL[:, :, i, :] += np.tensordot(jbuf, Lorb[:, ncore:nocc], axes=((0), (0))) kbuf = np.tensordot(kbuf, Lorb[:, ncore:nocc], axes=((1), (0))).transpose(1, 2, 0) aapaL[:, :, i, :] += kbuf + kbuf.transpose(1, 0, 2) # MRH: new vhf terms vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas)) vjL, vkL = mc._scf.get_jk(mol, (dmL_core, dmL_cas)) h1 = mc.get_hcore() vhf_c = vj[0] - vk[0] * .5 vhf_a = vj[1] - vk[1] * .5 vhfL_c = vjL[0] - vkL[0] * .5 vhfL_a = vjL[1] - vkL[1] * .5 gfock = np.dot(h1, dm1L) # h1e gfock += np.dot((vhf_c + vhf_a), dmL_core) # core-core and active-core, 2nd 1RDM linked gfock += np.dot((vhfL_c + vhfL_a), dm_core) # core-core and active-core, 1st 1RDM linked gfock += np.dot(vhfL_c, dm_cas) # core-active, 1st 1RDM linked gfock += np.dot(vhf_c, dmL_cas) # core-active, 2nd 1RDM linked gfock = np.dot( s0_inv, gfock) # Definition in MO's; going (AO->MO->AO) incurs inverse ovlp # [ERI TERM 1] gfock += reduce( np.dot, (mo_coeff, np.einsum('uviw,uvtw->it', aapaL, casdm2), mo_cas.T)) # [ERI TERM 2] gfock += reduce( np.dot, (mo_coeff, np.einsum('uviw,vuwt->it', aapa, casdm2), moL_cas.T)) dme0 = (gfock + gfock.T) / 2 # This transpose is for the overlap matrix later on aapa = vj = vk = vhf_c = vhf_a = None vj, vk = mf_grad.get_jk(mol, (dm_core, dm_cas, dmL_core, dmL_cas)) vhf1c, vhf1a, vhf1cL, vhf1aL = vj - vk * 0.5 hcore_deriv = mf_grad.hcore_generator(mol) s1 = mf_grad.get_ovlp(mol) diag_idx = np.arange(nao) diag_idx = diag_idx * (diag_idx + 1) // 2 + diag_idx casdm2_cc = casdm2 + casdm2.transpose(0, 1, 3, 2) dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2, ncas**2), mo_cas.T, (0, nao, 0, nao)).reshape(ncas**2, nao, nao) # MRH: contract the final two indices of the active-active 2RDM with L as you change to AOs # note tensordot always puts indices in the order of the arguments. dm2Lbuf = np.zeros((ncas**2, nmo, nmo)) # MRH: The second line below transposes the L; the third line transposes the derivative # Both the L and the derivative have to explore all indices Lcasdm2 = np.tensordot(Lorb[:, ncore:nocc], casdm2, axes=(1, 2)).transpose(1, 2, 0, 3) dm2Lbuf[:, :, ncore:nocc] = Lcasdm2.reshape(ncas**2, nmo, ncas) Lcasdm2 = np.tensordot(Lorb[:, ncore:nocc], casdm2, axes=(1, 3)).transpose(1, 2, 3, 0) dm2Lbuf[:, ncore:nocc, :] += Lcasdm2.reshape(ncas**2, ncas, nmo) Lcasdm2 = None dm2Lbuf += dm2Lbuf.transpose(0, 2, 1) dm2Lbuf = np.ascontiguousarray(dm2Lbuf) dm2Lbuf = ao2mo._ao2mo.nr_e2(dm2Lbuf.reshape(ncas**2, nmo**2), mo_coeff.T, (0, nao, 0, nao)).reshape(ncas**2, nao, nao) dm2buf = lib.pack_tril(dm2buf) dm2buf[:, diag_idx] *= .5 dm2buf = dm2buf.reshape(ncas, ncas, nao_pair) dm2Lbuf = lib.pack_tril(dm2Lbuf) dm2Lbuf[:, diag_idx] *= .5 dm2Lbuf = dm2Lbuf.reshape(ncas, ncas, nao_pair) if atmlst is None: atmlst = list(range(mol.natm)) aoslices = mol.aoslice_by_atom() de_hcore = np.zeros((len(atmlst), 3)) de_renorm = np.zeros((len(atmlst), 3)) de_eri = np.zeros((len(atmlst), 3)) de = np.zeros((len(atmlst), 3)) max_memory = mc.max_memory - lib.current_memory()[0] blksize = int(max_memory * .9e6 / 8 / (4 * (aoslices[:, 3] - aoslices[:, 2]).max() * nao_pair)) # MRH: 3 components of eri array and 1 density matrix array: # FOUR arrays of this size are required! blksize = min(nao, max(2, blksize)) logger.info( mc, 'SA-CASSCF Lorb_dot_dgorb memory remaining for eri manipulation: %f MB; using' ' blocksize = %d', max_memory, blksize) t0 = logger.timer(mc, 'SA-CASSCF Lorb_dot_dgorb 1-electron part', *t0) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = aoslices[ia] h1ao = hcore_deriv(ia) # MRH: h1e and Feff terms de_hcore[k] += np.einsum('xij,ij->x', h1ao, dm1L) de_renorm[k] -= np.einsum('xij,ij->x', s1[:, p0:p1], dme0[p0:p1]) * 2 q1 = 0 for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize): q0, q1 = q1, q1 + nf dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2Lbuf, mo_cas[p0:p1], mo_cas[q0:q1]) # MRH: contract first two indices of active-active 2RDM with L as you go MOs -> AOs dm2_ao += lib.einsum('ijw,pi,qj->pqw', dm2buf, moL_cas[p0:p1], mo_cas[q0:q1]) dm2_ao += lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1], moL_cas[q0:q1]) shls_slice = (shl0, shl1, b0, b1, 0, mol.nbas, 0, mol.nbas) eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl', shls_slice=shls_slice).reshape( 3, p1 - p0, nf, nao_pair) # MRH: I still don't understand why there is a minus here! de_eri[k] -= np.einsum('xijw,ijw->x', eri1, dm2_ao) * 2 eri1 = dm2_ao = None t0 = logger.timer( mc, 'SA-CASSCF Lorb_dot_dgorb atom {} ({},{}|{})'.format( ia, p1 - p0, nf, nao_pair), *t0) # MRH: core-core and core-active 2RDM terms de_eri[k] += np.einsum('xij,ij->x', vhf1c[:, p0:p1], dm1L[p0:p1]) * 2 de_eri[k] += np.einsum('xij,ij->x', vhf1cL[:, p0:p1], dm1[p0:p1]) * 2 # MRH: active-core 2RDM terms de_eri[k] += np.einsum('xij,ij->x', vhf1a[:, p0:p1], dmL_core[p0:p1]) * 2 de_eri[k] += np.einsum('xij,ij->x', vhf1aL[:, p0:p1], dm_core[p0:p1]) * 2 # MRH: deleted the nuclear-nuclear part to avoid double-counting # lesson learned from debugging - mol.intor computes -1 * the derivative and only # for one index # on the other hand, mf_grad.hcore_generator computes the actual derivative of # h1 for both indices and with the correct sign logger.debug(mc, "Orb lagrange hcore component:\n{}".format(de_hcore)) logger.debug(mc, "Orb lagrange renorm component:\n{}".format(de_renorm)) logger.debug(mc, "Orb lagrange eri component:\n{}".format(de_eri)) de = de_hcore + de_renorm + de_eri return de
def make_rdm1_with_orbital_response(mp): import time from pyscf import lib from pyscf.grad.mp2 import _response_dm1, _index_frozen_active, _shell_prange from pyscf.mp import mp2 from pyscf.ao2mo import _ao2mo log = lib.logger.new_logger(mp) time0 = time.clock(), time.time() mol = mp.mol log.debug('Build mp2 rdm1 intermediates') d1 = mp2._gamma1_intermediates(mp, mp.t2) doo, dvv = d1 time1 = log.timer_debug1('rdm1 intermediates', *time0) with_frozen = not (mp.frozen is None or mp.frozen is 0) OA, VA, OF, VF = _index_frozen_active(mp.get_frozen_mask(), mp.mo_occ) orbo = mp.mo_coeff[:, OA] orbv = mp.mo_coeff[:, VA] nao, nocc = orbo.shape nvir = orbv.shape[1] # Partially transform MP2 density matrix and hold it in memory # The rest transformation are applied during the contraction to ERI integrals part_dm2 = _ao2mo.nr_e2(mp.t2.reshape(nocc**2, nvir**2), numpy.asarray(orbv.T, order='F'), (0, nao, 0, nao), 's1', 's1').reshape(nocc, nocc, nao, nao) part_dm2 = (part_dm2.transpose(0, 2, 3, 1) * 4 - part_dm2.transpose(0, 3, 2, 1) * 2) offsetdic = mol.offset_nr_by_atom() diagidx = numpy.arange(nao) diagidx = diagidx * (diagidx + 1) // 2 + diagidx Imat = numpy.zeros((nao, nao)) # 2e AO integrals dot 2pdm max_memory = max(0, mp.max_memory - lib.current_memory()[0]) blksize = max(1, int(max_memory * .9e6 / 8 / (nao**3 * 2.5))) for ia in range(mol.natm): shl0, shl1, p0, p1 = offsetdic[ia] ip1 = p0 for b0, b1, nf in _shell_prange(mol, shl0, shl1, blksize): ip0, ip1 = ip1, ip1 + nf dm2buf = lib.einsum('pi,iqrj->pqrj', orbo[ip0:ip1], part_dm2) dm2buf += lib.einsum('qi,iprj->pqrj', orbo, part_dm2[:, ip0:ip1]) dm2buf = lib.einsum('pqrj,sj->pqrs', dm2buf, orbo) dm2buf = dm2buf + dm2buf.transpose(0, 1, 3, 2) dm2buf = lib.pack_tril(dm2buf.reshape(-1, nao, nao)).reshape(nf, nao, -1) dm2buf[:, :, diagidx] *= .5 shls_slice = (b0, b1, 0, mol.nbas, 0, mol.nbas, 0, mol.nbas) eri0 = mol.intor('int2e', aosym='s2kl', shls_slice=shls_slice) Imat += lib.einsum('ipx,iqx->pq', eri0.reshape(nf, nao, -1), dm2buf) eri0 = None dm2buf = None time1 = log.timer_debug1('2e-part grad of atom %d' % ia, *time1) # Recompute nocc, nvir to include the frozen orbitals and make contraction for # the 1-particle quantities, see also the kernel function in ccsd_grad module. mo_coeff = mp.mo_coeff mo_energy = mp._scf.mo_energy nao, nmo = mo_coeff.shape nocc = numpy.count_nonzero(mp.mo_occ > 0) Imat = reduce(numpy.dot, (mo_coeff.T, Imat, mp._scf.get_ovlp(), mo_coeff)) * -1 dm1mo = numpy.zeros((nmo, nmo)) if with_frozen: dco = Imat[OF[:, None], OA] / (mo_energy[OF, None] - mo_energy[OA]) dfv = Imat[VF[:, None], VA] / (mo_energy[VF, None] - mo_energy[VA]) dm1mo[OA[:, None], OA] = doo + doo.T dm1mo[OF[:, None], OA] = dco dm1mo[OA[:, None], OF] = dco.T dm1mo[VA[:, None], VA] = dvv + dvv.T dm1mo[VF[:, None], VA] = dfv dm1mo[VA[:, None], VF] = dfv.T else: dm1mo[:nocc, :nocc] = doo + doo.T dm1mo[nocc:, nocc:] = dvv + dvv.T dm1 = reduce(numpy.dot, (mo_coeff, dm1mo, mo_coeff.T)) vhf = mp._scf.get_veff(mp.mol, dm1) * 2 Xvo = reduce(numpy.dot, (mo_coeff[:, nocc:].T, vhf, mo_coeff[:, :nocc])) Xvo += Imat[:nocc, nocc:].T - Imat[nocc:, :nocc] dm1mo += _response_dm1(mp, Xvo) # Transform to AO basis dm1 = reduce(numpy.dot, (mo_coeff, dm1mo, mo_coeff.T)) dm1 += mp._scf.make_rdm1(mp.mo_coeff, mp.mo_occ) return dm1
def kernel(mc, mo_coeff=None, ci=None, atmlst=None, mf_grad=None, verbose=None): if mo_coeff is None: mo_coeff = mc.mo_coeff if ci is None: ci = mc.ci if mf_grad is None: mf_grad = mc._scf.nuc_grad_method() if mc.frozen is not None: raise NotImplementedError mol = mc.mol ncore = mc.ncore ncas = mc.ncas nocc = ncore + ncas nelecas = mc.nelecas nao, nmo = mo_coeff.shape nao_pair = nao * (nao+1) // 2 mo_occ = mo_coeff[:,:nocc] mo_core = mo_coeff[:,:ncore] mo_cas = mo_coeff[:,ncore:nocc] casdm1, casdm2 = mc.fcisolver.make_rdm12(ci, ncas, nelecas) # gfock = Generalized Fock, Adv. Chem. Phys., 69, 63 dm_core = numpy.dot(mo_core, mo_core.T) * 2 dm_cas = reduce(numpy.dot, (mo_cas, casdm1, mo_cas.T)) aapa = ao2mo.kernel(mol, (mo_cas, mo_cas, mo_occ, mo_cas), compact=False) aapa = aapa.reshape(ncas,ncas,nocc,ncas) vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas)) h1 = mc.get_hcore() vhf_c = vj[0] - vk[0] * .5 vhf_a = vj[1] - vk[1] * .5 gfock = reduce(numpy.dot, (mo_occ.T, h1 + vhf_c + vhf_a, mo_occ)) * 2 gfock[:,ncore:nocc] = reduce(numpy.dot, (mo_occ.T, h1 + vhf_c, mo_cas, casdm1)) gfock[:,ncore:nocc] += numpy.einsum('uviw,vuwt->it', aapa, casdm2) dme0 = reduce(numpy.dot, (mo_occ, (gfock+gfock.T)*.5, mo_occ.T)) aapa = vj = vk = vhf_c = vhf_a = h1 = gfock = None dm1 = dm_core + dm_cas vhf1c, vhf1a = mf_grad.get_veff(mol, (dm_core, dm_cas)) hcore_deriv = mf_grad.hcore_generator(mol) s1 = mf_grad.get_ovlp(mol) diag_idx = numpy.arange(nao) diag_idx = diag_idx * (diag_idx+1) // 2 + diag_idx casdm2_cc = casdm2 + casdm2.transpose(0,1,3,2) dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2,ncas**2), mo_cas.T, (0, nao, 0, nao)).reshape(ncas**2,nao,nao) dm2buf = lib.pack_tril(dm2buf) dm2buf[:,diag_idx] *= .5 dm2buf = dm2buf.reshape(ncas,ncas,nao_pair) casdm2 = casdm2_cc = None if atmlst is None: atmlst = range(mol.natm) aoslices = mol.aoslice_by_atom() de = numpy.zeros((len(atmlst),3)) max_memory = mc.max_memory - lib.current_memory()[0] blksize = int(max_memory*.9e6/8 / ((aoslices[:,3]-aoslices[:,2]).max()*nao_pair)) blksize = min(nao, max(2, blksize)) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = aoslices[ia] h1ao = hcore_deriv(ia) de[k] += numpy.einsum('xij,ij->x', h1ao, dm1) de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], dme0[p0:p1]) * 2 q1 = 0 for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize): q0, q1 = q1, q1 + nf dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1], mo_cas[q0:q1]) shls_slice = (shl0,shl1,b0,b1,0,mol.nbas,0,mol.nbas) eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl', shls_slice=shls_slice).reshape(3,p1-p0,nf,nao_pair) de[k] -= numpy.einsum('xijw,ijw->x', eri1, dm2_ao) * 2 eri1 = None de[k] += numpy.einsum('xij,ij->x', vhf1c[:,p0:p1], dm1[p0:p1]) * 2 de[k] += numpy.einsum('xij,ij->x', vhf1a[:,p0:p1], dm_core[p0:p1]) * 2 de += mf_grad.grad_nuc(mol, atmlst) return de