Exemplo n.º 1
0
def kernel(mycc,
           t1=None,
           t2=None,
           l1=None,
           l2=None,
           eris=None,
           atmlst=None,
           mf_grad=None,
           d1=None,
           d2=None,
           verbose=logger.INFO):
    if eris is not None:
        if abs(eris.fock - numpy.diag(eris.fock.diagonal())).max() > 1e-3:
            raise RuntimeError(
                'CCSD gradients does not support NHF (non-canonical HF)')

    if t1 is None: t1 = mycc.t1
    if t2 is None: t2 = mycc.t2
    if l1 is None: l1 = mycc.l1
    if l2 is None: l2 = mycc.l2
    if mf_grad is None: mf_grad = mycc._scf.nuc_grad_method()

    log = logger.new_logger(mycc, verbose)
    time0 = time.clock(), time.time()

    log.debug('Build ccsd rdm1 intermediates')
    if d1 is None:
        d1 = ccsd_rdm._gamma1_intermediates(mycc, t1, t2, l1, l2)
    doo, dov, dvo, dvv = d1
    time1 = log.timer_debug1('rdm1 intermediates', *time0)
    log.debug('Build ccsd rdm2 intermediates')
    fdm2 = lib.H5TmpFile()
    if d2 is None:
        d2 = ccsd_rdm._gamma2_outcore(mycc, t1, t2, l1, l2, fdm2, True)
    time1 = log.timer_debug1('rdm2 intermediates', *time1)

    mol = mycc.mol
    mo_coeff = mycc.mo_coeff
    mo_energy = mycc._scf.mo_energy
    nao, nmo = mo_coeff.shape
    nocc = numpy.count_nonzero(mycc.mo_occ > 0)
    with_frozen = not (mycc.frozen is None or mycc.frozen is 0)
    OA, VA, OF, VF = _index_frozen_active(mycc.get_frozen_mask(), mycc.mo_occ)

    log.debug('symmetrized rdm2 and MO->AO transformation')
    # Roughly, dm2*2 is computed in _rdm2_mo2ao
    mo_active = mo_coeff[:, numpy.hstack((OA, VA))]
    _rdm2_mo2ao(mycc, d2, mo_active, fdm2)  # transform the active orbitals
    time1 = log.timer_debug1('MO->AO transformation', *time1)
    hf_dm1 = mycc._scf.make_rdm1(mycc.mo_coeff, mycc.mo_occ)

    if atmlst is None:
        atmlst = range(mol.natm)
    offsetdic = mol.offset_nr_by_atom()
    diagidx = numpy.arange(nao)
    diagidx = diagidx * (diagidx + 1) // 2 + diagidx
    de = numpy.zeros((len(atmlst), 3))
    Imat = numpy.zeros((nao, nao))
    vhf1 = fdm2.create_dataset('vhf1', (len(atmlst), 3, nao, nao), 'f8')

    # 2e AO integrals dot 2pdm
    max_memory = max(0, mycc.max_memory - lib.current_memory()[0])
    blksize = max(1, int(max_memory * .9e6 / 8 / (nao**3 * 2.5)))

    for k, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = offsetdic[ia]
        ip1 = p0
        vhf = numpy.zeros((3, nao, nao))
        for b0, b1, nf in _shell_prange(mol, shl0, shl1, blksize):
            ip0, ip1 = ip1, ip1 + nf
            dm2buf = _load_block_tril(fdm2['dm2'], ip0, ip1, nao)
            dm2buf[:, :, diagidx] *= .5
            shls_slice = (b0, b1, 0, mol.nbas, 0, mol.nbas, 0, mol.nbas)
            eri0 = mol.intor('int2e', aosym='s2kl', shls_slice=shls_slice)
            Imat += lib.einsum('ipx,iqx->pq', eri0.reshape(nf, nao, -1),
                               dm2buf)
            eri0 = None

            eri1 = mol.intor('int2e_ip1',
                             comp=3,
                             aosym='s2kl',
                             shls_slice=shls_slice).reshape(3, nf, nao, -1)
            de[k] -= numpy.einsum('xijk,ijk->x', eri1, dm2buf) * 2
            dm2buf = None
            # HF part
            for i in range(3):
                eri1tmp = lib.unpack_tril(eri1[i].reshape(nf * nao, -1))
                eri1tmp = eri1tmp.reshape(nf, nao, nao, nao)
                vhf[i] += numpy.einsum('ijkl,ij->kl', eri1tmp, hf_dm1[ip0:ip1])
                vhf[i] -= numpy.einsum('ijkl,il->kj', eri1tmp,
                                       hf_dm1[ip0:ip1]) * .5
                vhf[i, ip0:ip1] += numpy.einsum('ijkl,kl->ij', eri1tmp, hf_dm1)
                vhf[i, ip0:ip1] -= numpy.einsum('ijkl,jk->il', eri1tmp,
                                                hf_dm1) * .5
            eri1 = eri1tmp = None
        vhf1[k] = vhf
        log.debug('2e-part grad of atom %d %s = %s', ia, mol.atom_symbol(ia),
                  de[k])
        time1 = log.timer_debug1('2e-part grad of atom %d' % ia, *time1)

    Imat = reduce(numpy.dot,
                  (mo_coeff.T, Imat, mycc._scf.get_ovlp(), mo_coeff)) * -1

    dm1mo = numpy.zeros((nmo, nmo))
    if with_frozen:
        dco = Imat[OF[:, None], OA] / (mo_energy[OF, None] - mo_energy[OA])
        dfv = Imat[VF[:, None], VA] / (mo_energy[VF, None] - mo_energy[VA])
        dm1mo[OA[:, None], OA] = doo + doo.T
        dm1mo[OF[:, None], OA] = dco
        dm1mo[OA[:, None], OF] = dco.T
        dm1mo[VA[:, None], VA] = dvv + dvv.T
        dm1mo[VF[:, None], VA] = dfv
        dm1mo[VA[:, None], VF] = dfv.T
    else:
        dm1mo[:nocc, :nocc] = doo + doo.T
        dm1mo[nocc:, nocc:] = dvv + dvv.T

    dm1 = reduce(numpy.dot, (mo_coeff, dm1mo, mo_coeff.T))
    vhf = mycc._scf.get_veff(mycc.mol, dm1) * 2
    Xvo = reduce(numpy.dot, (mo_coeff[:, nocc:].T, vhf, mo_coeff[:, :nocc]))
    Xvo += Imat[:nocc, nocc:].T - Imat[nocc:, :nocc]

    dm1mo += _response_dm1(mycc, Xvo, eris)
    time1 = log.timer_debug1('response_rdm1 intermediates', *time1)

    Imat[nocc:, :nocc] = Imat[:nocc, nocc:].T
    im1 = reduce(numpy.dot, (mo_coeff, Imat, mo_coeff.T))
    time1 = log.timer_debug1('response_rdm1', *time1)

    log.debug('h1 and JK1')
    hcore_deriv = mf_grad.hcore_generator(mol)
    s1 = mf_grad.get_ovlp(mol)
    zeta = lib.direct_sum('i+j->ij', mo_energy, mo_energy) * .5
    zeta[nocc:, :nocc] = mo_energy[:nocc]
    zeta[:nocc, nocc:] = mo_energy[:nocc].reshape(-1, 1)
    zeta = reduce(numpy.dot, (mo_coeff, zeta * dm1mo, mo_coeff.T))

    dm1 = reduce(numpy.dot, (mo_coeff, dm1mo, mo_coeff.T))
    p1 = numpy.dot(mo_coeff[:, :nocc], mo_coeff[:, :nocc].T)
    vhf_s1occ = reduce(numpy.dot,
                       (p1, mycc._scf.get_veff(mol, dm1 + dm1.T), p1))
    time1 = log.timer_debug1('h1 and JK1', *time1)

    # Hartree-Fock part contribution
    dm1p = hf_dm1 + dm1 * 2
    dm1 += hf_dm1
    zeta += mf_grad.make_rdm1e(mo_energy, mo_coeff, mycc.mo_occ)

    for k, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = offsetdic[ia]
        # s[1] dot I, note matrix im1 is not hermitian
        de[k] += numpy.einsum('xij,ij->x', s1[:, p0:p1], im1[p0:p1])
        de[k] += numpy.einsum('xji,ij->x', s1[:, p0:p1], im1[:, p0:p1])
        # h[1] \dot DM, contribute to f1
        h1ao = hcore_deriv(ia)
        de[k] += numpy.einsum('xij,ji->x', h1ao, dm1)
        # -s[1]*e \dot DM,  contribute to f1
        de[k] -= numpy.einsum('xij,ij->x', s1[:, p0:p1], zeta[p0:p1])
        de[k] -= numpy.einsum('xji,ij->x', s1[:, p0:p1], zeta[:, p0:p1])
        # -vhf[s_ij[1]],  contribute to f1, *2 for s1+s1.T
        de[k] -= numpy.einsum('xij,ij->x', s1[:, p0:p1], vhf_s1occ[p0:p1]) * 2
        de[k] -= numpy.einsum('xij,ij->x', vhf1[k], dm1p)

    de += mf_grad.grad_nuc(mol, atmlst)
    log.timer('%s gradients' % mycc.__class__.__name__, *time0)
    return de
Exemplo n.º 2
0
def kernel(mc,
           mo_coeff=None,
           ci=None,
           atmlst=None,
           mf_grad=None,
           verbose=None):
    if mo_coeff is None: mo_coeff = mc.mo_coeff
    if ci is None: ci = mc.ci
    if mf_grad is None: mf_grad = mc._scf.nuc_grad_method()
    if mc.frozen is not None:
        raise NotImplementedError

    mol = mc.mol
    ncore = mc.ncore
    ncas = mc.ncas
    nocc = ncore + ncas
    nelecas = mc.nelecas
    nao, nmo = mo_coeff.shape
    nao_pair = nao * (nao + 1) // 2

    mo_occ = mo_coeff[:, :nocc]
    mo_core = mo_coeff[:, :ncore]
    mo_cas = mo_coeff[:, ncore:nocc]

    casdm1, casdm2 = mc.fcisolver.make_rdm12(ci, ncas, nelecas)

    # gfock = Generalized Fock, Adv. Chem. Phys., 69, 63
    dm_core = numpy.dot(mo_core, mo_core.T) * 2
    dm_cas = reduce(numpy.dot, (mo_cas, casdm1, mo_cas.T))
    aapa = ao2mo.kernel(mol, (mo_cas, mo_cas, mo_occ, mo_cas), compact=False)
    aapa = aapa.reshape(ncas, ncas, nocc, ncas)
    vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas))
    h1 = mc.get_hcore()
    vhf_c = vj[0] - vk[0] * .5
    vhf_a = vj[1] - vk[1] * .5
    gfock = reduce(numpy.dot, (mo_occ.T, h1 + vhf_c + vhf_a, mo_occ)) * 2
    gfock[:, ncore:nocc] = reduce(numpy.dot,
                                  (mo_occ.T, h1 + vhf_c, mo_cas, casdm1))
    gfock[:, ncore:nocc] += numpy.einsum('uviw,vuwt->it', aapa, casdm2)
    dme0 = reduce(numpy.dot, (mo_occ, (gfock + gfock.T) * .5, mo_occ.T))
    aapa = vj = vk = vhf_c = vhf_a = h1 = gfock = None

    dm1 = dm_core + dm_cas
    vhf1c, vhf1a = mf_grad.get_veff(mol, (dm_core, dm_cas))
    hcore_deriv = mf_grad.hcore_generator(mol)
    s1 = mf_grad.get_ovlp(mol)

    diag_idx = numpy.arange(nao)
    diag_idx = diag_idx * (diag_idx + 1) // 2 + diag_idx
    casdm2_cc = casdm2 + casdm2.transpose(0, 1, 3, 2)
    dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2, ncas**2), mo_cas.T,
                                (0, nao, 0, nao)).reshape(ncas**2, nao, nao)
    dm2buf = lib.pack_tril(dm2buf)
    dm2buf[:, diag_idx] *= .5
    dm2buf = dm2buf.reshape(ncas, ncas, nao_pair)
    casdm2 = casdm2_cc = None

    if atmlst is None:
        atmlst = range(mol.natm)
    aoslices = mol.aoslice_by_atom()
    de = numpy.zeros((len(atmlst), 3))

    max_memory = mc.max_memory - lib.current_memory()[0]
    blksize = int(max_memory * .9e6 / 8 /
                  ((aoslices[:, 3] - aoslices[:, 2]).max() * nao_pair))
    blksize = min(nao, max(2, blksize))

    for k, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = aoslices[ia]
        h1ao = hcore_deriv(ia)
        de[k] += numpy.einsum('xij,ij->x', h1ao, dm1)
        de[k] -= numpy.einsum('xij,ij->x', s1[:, p0:p1], dme0[p0:p1]) * 2

        q1 = 0
        for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize):
            q0, q1 = q1, q1 + nf
            dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1],
                                mo_cas[q0:q1])
            shls_slice = (shl0, shl1, b0, b1, 0, mol.nbas, 0, mol.nbas)
            eri1 = mol.intor('int2e_ip1',
                             comp=3,
                             aosym='s2kl',
                             shls_slice=shls_slice).reshape(
                                 3, p1 - p0, nf, nao_pair)
            de[k] -= numpy.einsum('xijw,ijw->x', eri1, dm2_ao) * 2
            eri1 = None
        de[k] += numpy.einsum('xij,ij->x', vhf1c[:, p0:p1], dm1[p0:p1]) * 2
        de[k] += numpy.einsum('xij,ij->x', vhf1a[:, p0:p1], dm_core[p0:p1]) * 2

    de += rhf_grad.grad_nuc(mol, atmlst)
    return de
Exemplo n.º 3
0
def kernel(mc,
           mo_coeff=None,
           ci=None,
           atmlst=None,
           mf_grad=None,
           verbose=None):
    if mo_coeff is None: mo_coeff = mc._scf.mo_coeff
    if ci is None: ci = mc.ci
    if mf_grad is None: mf_grad = mc._scf.nuc_grad_method()
    assert (isinstance(ci, numpy.ndarray))

    mol = mc.mol
    ncore = mc.ncore
    ncas = mc.ncas
    nocc = ncore + ncas
    nelecas = mc.nelecas
    nao, nmo = mo_coeff.shape
    nao_pair = nao * (nao + 1) // 2
    mo_energy = mc._scf.mo_energy

    mo_occ = mo_coeff[:, :nocc]
    mo_core = mo_coeff[:, :ncore]
    mo_cas = mo_coeff[:, ncore:nocc]
    neleca, nelecb = mol.nelec
    assert (neleca == nelecb)
    orbo = mo_coeff[:, :neleca]
    orbv = mo_coeff[:, neleca:]

    casdm1, casdm2 = mc.fcisolver.make_rdm12(ci, ncas, nelecas)
    dm_core = numpy.dot(mo_core, mo_core.T) * 2
    dm_cas = reduce(numpy.dot, (mo_cas, casdm1, mo_cas.T))
    aapa = ao2mo.kernel(mol, (mo_cas, mo_cas, mo_coeff, mo_cas), compact=False)
    aapa = aapa.reshape(ncas, ncas, nmo, ncas)
    vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas))
    h1 = mc.get_hcore()
    vhf_c = vj[0] - vk[0] * .5
    vhf_a = vj[1] - vk[1] * .5
    # Imat = h1_{pi} gamma1_{iq} + h2_{pijk} gamma_{iqkj}
    Imat = numpy.zeros((nmo, nmo))
    Imat[:, :nocc] = reduce(numpy.dot,
                            (mo_coeff.T, h1 + vhf_c + vhf_a, mo_occ)) * 2
    Imat[:, ncore:nocc] = reduce(numpy.dot,
                                 (mo_coeff.T, h1 + vhf_c, mo_cas, casdm1))
    Imat[:, ncore:nocc] += lib.einsum('uviw,vuwt->it', aapa, casdm2)
    aapa = vj = vk = vhf_c = vhf_a = h1 = None

    ee = mo_energy[:, None] - mo_energy
    zvec = numpy.zeros_like(Imat)
    zvec[:ncore,
         ncore:neleca] = Imat[:ncore, ncore:neleca] / -ee[:ncore, ncore:neleca]
    zvec[ncore:neleca, :ncore] = Imat[
        ncore:neleca, :ncore] / -ee[ncore:neleca, :ncore]
    zvec[nocc:,
         neleca:nocc] = Imat[nocc:, neleca:nocc] / -ee[nocc:, neleca:nocc]
    zvec[neleca:nocc,
         nocc:] = Imat[neleca:nocc, nocc:] / -ee[neleca:nocc, nocc:]

    zvec_ao = reduce(numpy.dot, (mo_coeff, zvec + zvec.T, mo_coeff.T))
    vhf = mc._scf.get_veff(mol, zvec_ao) * 2
    xvo = reduce(numpy.dot, (orbv.T, vhf, orbo))
    xvo += Imat[neleca:, :neleca] - Imat[:neleca, neleca:].T

    def fvind(x):
        x = x.reshape(xvo.shape)
        dm = reduce(numpy.dot, (orbv, x, orbo.T))
        v = mc._scf.get_veff(mol, dm + dm.T)
        v = reduce(numpy.dot, (orbv.T, v, orbo))
        return v * 2

    dm1resp = cphf.solve(fvind, mo_energy, mc._scf.mo_occ, xvo,
                         max_cycle=30)[0]
    zvec[neleca:, :neleca] = dm1resp

    zeta = numpy.einsum('ij,j->ij', zvec, mo_energy)
    zeta = reduce(numpy.dot, (mo_coeff, zeta, mo_coeff.T))

    zvec_ao = reduce(numpy.dot, (mo_coeff, zvec + zvec.T, mo_coeff.T))
    p1 = numpy.dot(mo_coeff[:, :neleca], mo_coeff[:, :neleca].T)
    vhf_s1occ = reduce(numpy.dot, (p1, mc._scf.get_veff(mol, zvec_ao), p1))

    Imat[:ncore, ncore:neleca] = 0
    Imat[ncore:neleca, :ncore] = 0
    Imat[nocc:, neleca:nocc] = 0
    Imat[neleca:nocc, nocc:] = 0
    Imat[neleca:, :neleca] = Imat[:neleca, neleca:].T
    im1 = reduce(numpy.dot, (mo_coeff, Imat, mo_coeff.T))

    casci_dm1 = dm_core + dm_cas
    hf_dm1 = mc._scf.make_rdm1(mo_coeff, mc._scf.mo_occ)
    hcore_deriv = mf_grad.hcore_generator(mol)
    s1 = mf_grad.get_ovlp(mol)

    diag_idx = numpy.arange(nao)
    diag_idx = diag_idx * (diag_idx + 1) // 2 + diag_idx
    casdm2_cc = casdm2 + casdm2.transpose(0, 1, 3, 2)
    dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2, ncas**2), mo_cas.T,
                                (0, nao, 0, nao)).reshape(ncas**2, nao, nao)
    dm2buf = lib.pack_tril(dm2buf)
    dm2buf[:, diag_idx] *= .5
    dm2buf = dm2buf.reshape(ncas, ncas, nao_pair)
    casdm2 = casdm2_cc = None

    if atmlst is None:
        atmlst = range(mol.natm)
    aoslices = mol.aoslice_by_atom()
    de = numpy.zeros((len(atmlst), 3))

    max_memory = mc.max_memory - lib.current_memory()[0]
    blksize = int(max_memory * .9e6 / 8 /
                  ((aoslices[:, 3] - aoslices[:, 2]).max() * nao_pair))
    blksize = min(nao, max(2, blksize))

    for k, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = aoslices[ia]
        h1ao = hcore_deriv(ia)
        de[k] += numpy.einsum('xij,ij->x', h1ao, casci_dm1)
        de[k] += numpy.einsum('xij,ij->x', h1ao, zvec_ao)

        vhf1 = numpy.zeros((3, nao, nao))
        q1 = 0
        for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize):
            q0, q1 = q1, q1 + nf
            dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1],
                                mo_cas[q0:q1])
            shls_slice = (shl0, shl1, b0, b1, 0, mol.nbas, 0, mol.nbas)
            eri1 = mol.intor('int2e_ip1',
                             comp=3,
                             aosym='s2kl',
                             shls_slice=shls_slice).reshape(
                                 3, p1 - p0, nf, nao_pair)
            de[k] -= numpy.einsum('xijw,ijw->x', eri1, dm2_ao) * 2

            for i in range(3):
                eri1tmp = lib.unpack_tril(eri1[i].reshape((p1 - p0) * nf, -1))
                eri1tmp = eri1tmp.reshape(p1 - p0, nf, nao, nao)
                de[k, i] -= numpy.einsum('ijkl,ij,kl', eri1tmp,
                                         hf_dm1[p0:p1, q0:q1], zvec_ao) * 2
                de[k, i] -= numpy.einsum('ijkl,kl,ij', eri1tmp, hf_dm1,
                                         zvec_ao[p0:p1, q0:q1]) * 2
                de[k, i] += numpy.einsum('ijkl,il,kj', eri1tmp, hf_dm1[p0:p1],
                                         zvec_ao[q0:q1])
                de[k, i] += numpy.einsum('ijkl,jk,il', eri1tmp, hf_dm1[q0:q1],
                                         zvec_ao[p0:p1])

                #:vhf1c, vhf1a = mf_grad.get_veff(mol, (dm_core, dm_cas))
                #:de[k] += numpy.einsum('xij,ij->x', vhf1c[:,p0:p1], casci_dm1[p0:p1]) * 2
                #:de[k] += numpy.einsum('xij,ij->x', vhf1a[:,p0:p1], dm_core[p0:p1]) * 2
                de[k, i] -= numpy.einsum('ijkl,lk,ij', eri1tmp, dm_core[q0:q1],
                                         casci_dm1[p0:p1]) * 2
                de[k, i] += numpy.einsum('ijkl,jk,il', eri1tmp, dm_core[q0:q1],
                                         casci_dm1[p0:p1])
                de[k, i] -= numpy.einsum('ijkl,lk,ij', eri1tmp, dm_cas[q0:q1],
                                         dm_core[p0:p1]) * 2
                de[k, i] += numpy.einsum('ijkl,jk,il', eri1tmp, dm_cas[q0:q1],
                                         dm_core[p0:p1])
            eri1 = eri1tmp = None

        de[k] -= numpy.einsum('xij,ij->x', s1[:, p0:p1], im1[p0:p1])
        de[k] -= numpy.einsum('xij,ji->x', s1[:, p0:p1], im1[:, p0:p1])

        de[k] -= numpy.einsum('xij,ij->x', s1[:, p0:p1], zeta[p0:p1]) * 2
        de[k] -= numpy.einsum('xij,ji->x', s1[:, p0:p1], zeta[:, p0:p1]) * 2

        de[k] -= numpy.einsum('xij,ij->x', s1[:, p0:p1], vhf_s1occ[p0:p1]) * 2
        de[k] -= numpy.einsum('xij,ji->x', s1[:, p0:p1], vhf_s1occ[:,
                                                                   p0:p1]) * 2

    de += rhf_grad.grad_nuc(mol, atmlst)
    return de
Exemplo n.º 4
0
def grad_elec(mc_grad, mo_coeff=None, ci=None, atmlst=None, verbose=None):
    mc = mc_grad.base
    if mo_coeff is None: mo_coeff = mc.mo_coeff
    if ci is None: ci = mc.ci
    if mc.frozen is not None:
        raise NotImplementedError

    time0 = time.clock(), time.time()
    log = logger.new_logger(mc_grad, verbose)
    mol = mc_grad.mol
    ncore = mc.ncore
    ncas = mc.ncas
    nocc = ncore + ncas
    nelecas = mc.nelecas
    nao, nmo = mo_coeff.shape
    nao_pair = nao * (nao+1) // 2

    # Necessary kludge because gfock isn't zero in occ-virt space in SA-CASSCf
    # Among many other potential applications!
    if hasattr (mc, '_tag_gfock_ov_nonzero'):
        if mc._tag_gfock_ov_nonzero:
            nocc = nmo

    mo_occ = mo_coeff[:,:nocc]
    mo_core = mo_coeff[:,:ncore]
    mo_cas = mo_coeff[:,ncore:ncore+ncas]

    casdm1, casdm2 = mc.fcisolver.make_rdm12(ci, ncas, nelecas)

# gfock = Generalized Fock, Adv. Chem. Phys., 69, 63
    dm_core = numpy.dot(mo_core, mo_core.T) * 2
    dm_cas = reduce(numpy.dot, (mo_cas, casdm1, mo_cas.T))
    # MRH flag: this is one of my kludges
    # It would be better to just pass the ERIS object used in orbital optimization
    # But I am too lazy at the moment
    aapa = ao2mo.kernel(mol, (mo_cas, mo_cas, mo_occ, mo_cas), compact=False)
    aapa = aapa.reshape(ncas,ncas,nocc,ncas)
    vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas))
    h1 = mc.get_hcore()
    vhf_c = vj[0] - vk[0] * .5
    vhf_a = vj[1] - vk[1] * .5
    gfock = numpy.zeros ((nocc, nocc))
    gfock[:,:ncore] = reduce(numpy.dot, (mo_occ.T, h1 + vhf_c + vhf_a, mo_core)) * 2
    gfock[:,ncore:ncore+ncas] = reduce(numpy.dot, (mo_occ.T, h1 + vhf_c, mo_cas, casdm1))
    gfock[:,ncore:ncore+ncas] += numpy.einsum('uviw,vuwt->it', aapa, casdm2)
    dme0 = reduce(numpy.dot, (mo_occ, (gfock+gfock.T)*.5, mo_occ.T))
    aapa = vj = vk = vhf_c = vhf_a = h1 = gfock = None

    dm1 = dm_core + dm_cas
    vj, vk = mc_grad.get_jk(mol, (dm_core, dm_cas))
    vhf1c, vhf1a = vj - vk * .5
    hcore_deriv = mc_grad.hcore_generator(mol)
    s1 = mc_grad.get_ovlp(mol)

    diag_idx = numpy.arange(nao)
    diag_idx = diag_idx * (diag_idx+1) // 2 + diag_idx
    casdm2_cc = casdm2 + casdm2.transpose(0,1,3,2)
    dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2,ncas**2), mo_cas.T,
                                (0, nao, 0, nao)).reshape(ncas**2,nao,nao)
    dm2buf = lib.pack_tril(dm2buf)
    dm2buf[:,diag_idx] *= .5
    dm2buf = dm2buf.reshape(ncas,ncas,nao_pair)
    casdm2 = casdm2_cc = None

    if atmlst is None:
        atmlst = range(mol.natm)
    aoslices = mol.aoslice_by_atom()
    de = numpy.zeros((len(atmlst),3))

    max_memory = mc_grad.max_memory - lib.current_memory()[0]
    # MRH: this originally implied that the memory footprint would be max(p1-p0) * max(q1-q0) * nao_pair
    # In fact, that's the size of dm2_ao AND EACH COMPONENT of the differentiated eris
    # So the actual memory footprint is 4 times that!
    blksize = int(max_memory*.9e6/8 / (4*(aoslices[:,3]-aoslices[:,2]).max()*nao_pair))
    blksize = min(nao, max(2, blksize))

    for k, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = aoslices[ia]
        h1ao = hcore_deriv(ia)
        de[k] += numpy.einsum('xij,ij->x', h1ao, dm1)
        de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], dme0[p0:p1]) * 2

        q1 = 0
        for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize):
            q0, q1 = q1, q1 + nf
            dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1], mo_cas[q0:q1])
            shls_slice = (shl0,shl1,b0,b1,0,mol.nbas,0,mol.nbas)
            eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl',
                             shls_slice=shls_slice).reshape(3,p1-p0,nf,nao_pair)
            de[k] -= numpy.einsum('xijw,ijw->x', eri1, dm2_ao) * 2
            eri1 = None
        de[k] += numpy.einsum('xij,ij->x', vhf1c[:,p0:p1], dm1[p0:p1]) * 2
        de[k] += numpy.einsum('xij,ij->x', vhf1a[:,p0:p1], dm_core[p0:p1]) * 2

    log.timer('CASSCF nuclear gradients', *time0)
    return de
Exemplo n.º 5
0
def kernel(mc,
           mo_coeff=None,
           ci=None,
           atmlst=None,
           mf_grad=None,
           verbose=None):
    if mo_coeff is None: mo_coeff = mc._scf.mo_coeff
    if ci is None: ci = mc.ci
    if mf_grad is None: mf_grad = mc._scf.nuc_grad_method()

    mol = mc.mol
    ncore = mc.ncore
    ncas = mc.ncas
    nocc = ncore + ncas
    nelecas = mc.nelecas
    nao, nmo = mo_coeff.shape
    nao_pair = nao * (nao + 1) // 2
    mo_energy = mc._scf.mo_energy

    hcore_deriv = mf_grad.hcore_generator(mol)
    s1 = mf_grad.get_ovlp(mol)
    mo_occ = mo_coeff[:, :nocc]
    mo_core = mo_coeff[:, :ncore]
    mo_cas = mo_coeff[:, ncore:nocc]

    casdm1, casdm2 = mc.fcisolver.make_rdm12(mc.ci, ncas, nelecas)

    # gfock = Generalized Fock, Adv. Chem. Phys., 69, 63
    dm_core = numpy.dot(mo_core, mo_core.T) * 2
    dm_cas = reduce(numpy.dot, (mo_cas, casdm1, mo_cas.T))
    aapa = ao2mo.kernel(mol, (mo_cas, mo_cas, mo_occ, mo_cas), compact=False)
    aapa = aapa.reshape(ncas, ncas, nocc, ncas)
    vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas))
    h1 = mc.get_hcore()
    vhf_c = vj[0] - vk[0] * .5
    vhf_a = vj[1] - vk[1] * .5
    gfock = reduce(numpy.dot, (mo_occ.T, h1 + vhf_c + vhf_a, mo_occ)) * 2
    gfock[:, ncore:nocc] = reduce(numpy.dot,
                                  (mo_occ.T, h1 + vhf_c, mo_cas, casdm1))
    gfock[:, ncore:nocc] += numpy.einsum('uviw,vuwt->it', aapa, casdm2)
    dme0 = reduce(numpy.dot, (mo_occ, (gfock + gfock.T) * .5, mo_occ.T))
    aapa = vj = vk = vhf_c = vhf_a = h1 = gfock = None

    dm1 = dm_core + dm_cas
    vhf1c, vhf1a = mf_grad.get_veff(mol, (dm_core, dm_cas))

    diag_idx = numpy.arange(nao)
    diag_idx = diag_idx * (diag_idx + 1) // 2 + diag_idx
    casdm2_cc = casdm2 + casdm2.transpose(0, 1, 3, 2)
    dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2, ncas**2), mo_cas.T,
                                (0, nao, 0, nao)).reshape(ncas**2, nao, nao)
    dm2buf = lib.pack_tril(dm2buf)
    dm2buf[:, diag_idx] *= .5
    dm2buf = dm2buf.reshape(ncas, ncas, nao_pair)
    #casdm2 = casdm2_cc = None

    atmlst = range(mol.natm)
    aoslices = mol.aoslice_by_atom()
    de = numpy.zeros((len(atmlst), 3))

    max_memory = mc.max_memory - lib.current_memory()[0]
    blksize = int(max_memory * .9e6 / 8 /
                  ((aoslices[:, 3] - aoslices[:, 2]).max() * nao_pair))
    blksize = min(nao, max(2, blksize))

    for k, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = aoslices[ia]
        h1ao = hcore_deriv(ia)
        de[k] += numpy.einsum('xij,ij->x', h1ao, dm1)
        #de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], dme0[p0:p1]) * 2

        q1 = 0
        for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize):
            q0, q1 = q1, q1 + nf
            dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1],
                                mo_cas[q0:q1])
            shls_slice = (shl0, shl1, b0, b1, 0, mol.nbas, 0, mol.nbas)
            eri1 = mol.intor('int2e_ip1',
                             comp=3,
                             aosym='s2kl',
                             shls_slice=shls_slice).reshape(
                                 3, p1 - p0, nf, nao_pair)
            de[k] -= numpy.einsum('xijw,ijw->x', eri1, dm2_ao) * 2
            eri1 = None
        de[k] += numpy.einsum('xij,ij->x', vhf1c[:, p0:p1], dm1[p0:p1]) * 2
        de[k] += numpy.einsum('xij,ij->x', vhf1a[:, p0:p1], dm_core[p0:p1]) * 2

    dm2 = numpy.zeros((nmo, nmo, nmo, nmo))
    for i in range(ncore):
        for j in range(ncore):
            dm2[i, i, j, j] += 4
            dm2[i, j, j, i] -= 2
        dm2[i, i, ncore:nocc, ncore:nocc] = casdm1 * 2
        dm2[ncore:nocc, ncore:nocc, i, i] = casdm1 * 2
        dm2[i, ncore:nocc, ncore:nocc, i] = -casdm1
        dm2[ncore:nocc, i, i, ncore:nocc] = -casdm1
    dm2[ncore:nocc, ncore:nocc, ncore:nocc, ncore:nocc] = casdm2
    eri0 = ao2mo.restore(1, ao2mo.full(mc._scf._eri, mo_coeff), nmo)
    Imat = numpy.einsum('pjkl,qjkl->pq', eri0, dm2)

    dm1 = numpy.zeros((nmo, nmo))
    for i in range(ncore):
        dm1[i, i] = 2
    dm1[ncore:nocc, ncore:nocc] = casdm1

    neleca, nelecb = mol.nelec

    h1 = -(mol.intor('int1e_ipkin', comp=3) + mol.intor('int1e_ipnuc', comp=3))
    s1 = -mol.intor('int1e_ipovlp', comp=3)
    eri1 = mol.intor('int2e_ip1', comp=3).reshape(3, nao, nao, nao, nao)
    eri1 = numpy.einsum('xipkl,pj->xijkl', eri1, mo_coeff)
    eri1 = numpy.einsum('xijpl,pk->xijkl', eri1, mo_coeff)
    eri1 = numpy.einsum('xijkp,pl->xijkl', eri1, mo_coeff)
    h0 = reduce(numpy.dot, (mo_coeff.T, mc._scf.get_hcore(), mo_coeff))
    g0 = ao2mo.restore(1, ao2mo.full(mol, mo_coeff), nmo)

    def hess():
        nocc = mol.nelectron // 2
        nvir = nmo - nocc
        eri_mo = g0
        eai = lib.direct_sum('a-i->ai', mo_energy[nocc:], mo_energy[:nocc])
        h = eri_mo[nocc:, :nocc, nocc:, :nocc] * 4
        h -= numpy.einsum('cdlk->ckdl', eri_mo[nocc:, nocc:, :nocc, :nocc])
        h -= numpy.einsum('cldk->ckdl', eri_mo[nocc:, :nocc, nocc:, :nocc])
        for a in range(nvir):
            for i in range(nocc):
                h[a, i, a, i] += eai[a, i]
        return -h.reshape(nocc * nvir, -1)

    hh = hess()
    ee = mo_energy[:, None] - mo_energy

    for k, (sh0, sh1, p0, p1) in enumerate(mol.offset_nr_by_atom()):
        mol.set_rinv_origin(mol.atom_coord(k))
        vrinv = -mol.atom_charge(k) * mol.intor('int1e_iprinv', comp=3)

        # 2e AO integrals dot 2pdm
        for i in range(3):
            g1 = numpy.einsum('pjkl,pi->ijkl', eri1[i, p0:p1], mo_coeff[p0:p1])
            g1 = g1 + g1.transpose(1, 0, 2, 3)
            g1 = g1 + g1.transpose(2, 3, 0, 1)
            g1 *= -1
            hx = (numpy.einsum('pq,pi,qj->ij', h1[i, p0:p1], mo_coeff[p0:p1],
                               mo_coeff) +
                  reduce(numpy.dot, (mo_coeff.T, vrinv[i], mo_coeff)))
            hx = hx + hx.T
            sx = numpy.einsum('pq,pi,qj->ij', s1[i, p0:p1], mo_coeff[p0:p1],
                              mo_coeff)
            sx = sx + sx.T

            fij = (hx[:neleca, :neleca] - numpy.einsum(
                'ij,j->ij', sx[:neleca, :neleca], mo_energy[:neleca]) -
                   numpy.einsum('kl,ijlk->ij', sx[:neleca, :neleca],
                                g0[:neleca, :neleca, :neleca, :neleca]) * 2 +
                   numpy.einsum('kl,iklj->ij', sx[:neleca, :neleca],
                                g0[:neleca, :neleca, :neleca, :neleca]) +
                   numpy.einsum('ijkk->ij',
                                g1[:neleca, :neleca, :neleca, :neleca]) * 2 -
                   numpy.einsum('ikkj->ij',
                                g1[:neleca, :neleca, :neleca, :neleca]))

            fab = (hx[neleca:, neleca:] - numpy.einsum(
                'ij,j->ij', sx[neleca:, neleca:], mo_energy[neleca:]) -
                   numpy.einsum('kl,ijlk->ij', sx[:neleca, :neleca],
                                g0[neleca:, neleca:, :neleca, :neleca]) * 2 +
                   numpy.einsum('kl,iklj->ij', sx[:neleca, :neleca],
                                g0[neleca:, :neleca, :neleca, neleca:]) +
                   numpy.einsum('ijkk->ij',
                                g1[neleca:, neleca:, :neleca, :neleca]) * 2 -
                   numpy.einsum('ikkj->ij', g1[neleca:, :neleca, :neleca,
                                               neleca:]))

            fai = (hx[neleca:, :neleca] - numpy.einsum(
                'ai,i->ai', sx[neleca:, :neleca], mo_energy[:neleca]) -
                   numpy.einsum('kl,ijlk->ij', sx[:neleca, :neleca],
                                g0[neleca:, :neleca, :neleca, :neleca]) * 2 +
                   numpy.einsum('kl,iklj->ij', sx[:neleca, :neleca],
                                g0[neleca:, :neleca, :neleca, :neleca]) +
                   numpy.einsum('ijkk->ij',
                                g1[neleca:, :neleca, :neleca, :neleca]) * 2 -
                   numpy.einsum('ikkj->ij',
                                g1[neleca:, :neleca, :neleca, :neleca]))
            c1 = numpy.zeros((nmo, nmo))
            c1[:neleca, :neleca] = -.5 * sx[:neleca, :neleca]
            c1[neleca:, neleca:] = -.5 * sx[neleca:, neleca:]
            cvo1 = numpy.linalg.solve(hh, fai.ravel()).reshape(-1, neleca)
            cov1 = -(sx[neleca:, :neleca] + cvo1).T
            c1[neleca:, :neleca] = cvo1
            c1[:neleca, neleca:] = cov1
            v1 = numpy.einsum('pqai,ai->pq', g0[:, :, neleca:, :neleca],
                              cvo1) * 4
            v1 -= numpy.einsum('paiq,ai->pq', g0[:, neleca:, :neleca, :], cvo1)
            v1 -= numpy.einsum('piaq,ai->pq', g0[:, :neleca, neleca:, :], cvo1)
            fij += v1[:neleca, :neleca]
            fab += v1[neleca:, neleca:]
            c1[:ncore,
               ncore:neleca] = -fij[:ncore, ncore:] / ee[:ncore, ncore:neleca]
            c1[ncore:neleca, :ncore] = -fij[ncore:, :ncore] / ee[
                ncore:neleca, :ncore]
            m = nocc - neleca
            c1[nocc:, neleca:nocc] = -fab[m:, :m] / ee[nocc:, neleca:nocc]
            c1[neleca:nocc, nocc:] = -fab[:m, m:] / ee[neleca:nocc, nocc:]
            h0c1 = h0.dot(c1)
            h0c1 = h0c1 + h0c1.T
            g0c1 = numpy.einsum('pjkl,pi->ijkl', g0, c1)
            g0c1 = g0c1 + g0c1.transpose(1, 0, 2, 3)
            g0c1 = g0c1 + g0c1.transpose(2, 3, 0, 1)

            de[k, i] += numpy.einsum('ij,ji', h0c1, dm1)
            de[k, i] += numpy.einsum('ijkl,jilk', g0c1, dm2) * .5

    de += rhf_grad.grad_nuc(mol)
    return de
Exemplo n.º 6
0
def kernel(mc, mo_coeff=None, ci=None, atmlst=None, mf_grad=None, verbose=None):
    if mo_coeff is None: mo_coeff = mc._scf.mo_coeff
    if ci is None: ci = mc.ci
    if mf_grad is None: mf_grad = mc._scf.nuc_grad_method()
    assert(isinstance(ci, numpy.ndarray))

    mol = mc.mol
    ncore = mc.ncore
    ncas = mc.ncas
    nocc = ncore + ncas
    nelecas = mc.nelecas
    nao, nmo = mo_coeff.shape
    nao_pair = nao * (nao+1) // 2
    mo_energy = mc._scf.mo_energy

    mo_occ = mo_coeff[:,:nocc]
    mo_core = mo_coeff[:,:ncore]
    mo_cas = mo_coeff[:,ncore:nocc]
    neleca, nelecb = mol.nelec
    assert(neleca == nelecb)
    orbo = mo_coeff[:,:neleca]
    orbv = mo_coeff[:,neleca:]

    casdm1, casdm2 = mc.fcisolver.make_rdm12(ci, ncas, nelecas)
    dm_core = numpy.dot(mo_core, mo_core.T) * 2
    dm_cas = reduce(numpy.dot, (mo_cas, casdm1, mo_cas.T))
    aapa = ao2mo.kernel(mol, (mo_cas, mo_cas, mo_coeff, mo_cas), compact=False)
    aapa = aapa.reshape(ncas,ncas,nmo,ncas)
    vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas))
    h1 = mc.get_hcore()
    vhf_c = vj[0] - vk[0] * .5
    vhf_a = vj[1] - vk[1] * .5
    # Imat = h1_{pi} gamma1_{iq} + h2_{pijk} gamma_{iqkj}
    Imat = numpy.zeros((nmo,nmo))
    Imat[:,:nocc] = reduce(numpy.dot, (mo_coeff.T, h1 + vhf_c + vhf_a, mo_occ)) * 2
    Imat[:,ncore:nocc] = reduce(numpy.dot, (mo_coeff.T, h1 + vhf_c, mo_cas, casdm1))
    Imat[:,ncore:nocc] += lib.einsum('uviw,vuwt->it', aapa, casdm2)
    aapa = vj = vk = vhf_c = vhf_a = h1 = None

    ee = mo_energy[:,None] - mo_energy
    zvec = numpy.zeros_like(Imat)
    zvec[:ncore,ncore:neleca] = Imat[:ncore,ncore:neleca] / -ee[:ncore,ncore:neleca]
    zvec[ncore:neleca,:ncore] = Imat[ncore:neleca,:ncore] / -ee[ncore:neleca,:ncore]
    zvec[nocc:,neleca:nocc] = Imat[nocc:,neleca:nocc] / -ee[nocc:,neleca:nocc]
    zvec[neleca:nocc,nocc:] = Imat[neleca:nocc,nocc:] / -ee[neleca:nocc,nocc:]

    zvec_ao = reduce(numpy.dot, (mo_coeff, zvec+zvec.T, mo_coeff.T))
    vhf = mc._scf.get_veff(mol, zvec_ao) * 2
    xvo = reduce(numpy.dot, (orbv.T, vhf, orbo))
    xvo += Imat[neleca:,:neleca] - Imat[:neleca,neleca:].T
    def fvind(x):
        x = x.reshape(xvo.shape)
        dm = reduce(numpy.dot, (orbv, x, orbo.T))
        v = mc._scf.get_veff(mol, dm + dm.T)
        v = reduce(numpy.dot, (orbv.T, v, orbo))
        return v * 2
    dm1resp = cphf.solve(fvind, mo_energy, mc._scf.mo_occ, xvo, max_cycle=30)[0]
    zvec[neleca:,:neleca] = dm1resp

    zeta = numpy.einsum('ij,j->ij', zvec, mo_energy)
    zeta = reduce(numpy.dot, (mo_coeff, zeta, mo_coeff.T))

    zvec_ao = reduce(numpy.dot, (mo_coeff, zvec+zvec.T, mo_coeff.T))
    p1 = numpy.dot(mo_coeff[:,:neleca], mo_coeff[:,:neleca].T)
    vhf_s1occ = reduce(numpy.dot, (p1, mc._scf.get_veff(mol, zvec_ao), p1))

    Imat[:ncore,ncore:neleca] = 0
    Imat[ncore:neleca,:ncore] = 0
    Imat[nocc:,neleca:nocc] = 0
    Imat[neleca:nocc,nocc:] = 0
    Imat[neleca:,:neleca] = Imat[:neleca,neleca:].T
    im1 = reduce(numpy.dot, (mo_coeff, Imat, mo_coeff.T))

    casci_dm1 = dm_core + dm_cas
    hf_dm1 = mc._scf.make_rdm1(mo_coeff, mc._scf.mo_occ)
    hcore_deriv = mf_grad.hcore_generator(mol)
    s1 = mf_grad.get_ovlp(mol)

    diag_idx = numpy.arange(nao)
    diag_idx = diag_idx * (diag_idx+1) // 2 + diag_idx
    casdm2_cc = casdm2 + casdm2.transpose(0,1,3,2)
    dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2,ncas**2), mo_cas.T,
                                (0, nao, 0, nao)).reshape(ncas**2,nao,nao)
    dm2buf = lib.pack_tril(dm2buf)
    dm2buf[:,diag_idx] *= .5
    dm2buf = dm2buf.reshape(ncas,ncas,nao_pair)
    casdm2 = casdm2_cc = None

    if atmlst is None:
        atmlst = range(mol.natm)
    aoslices = mol.aoslice_by_atom()
    de = numpy.zeros((len(atmlst),3))

    max_memory = mc.max_memory - lib.current_memory()[0]
    blksize = int(max_memory*.9e6/8 / ((aoslices[:,3]-aoslices[:,2]).max()*nao_pair))
    blksize = min(nao, max(2, blksize))

    for k, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = aoslices[ia]
        h1ao = hcore_deriv(ia)
        de[k] += numpy.einsum('xij,ij->x', h1ao, casci_dm1)
        de[k] += numpy.einsum('xij,ij->x', h1ao, zvec_ao)

        vhf1 = numpy.zeros((3,nao,nao))
        q1 = 0
        for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize):
            q0, q1 = q1, q1 + nf
            dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1], mo_cas[q0:q1])
            shls_slice = (shl0,shl1,b0,b1,0,mol.nbas,0,mol.nbas)
            eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl',
                             shls_slice=shls_slice).reshape(3,p1-p0,nf,nao_pair)
            de[k] -= numpy.einsum('xijw,ijw->x', eri1, dm2_ao) * 2

            for i in range(3):
                eri1tmp = lib.unpack_tril(eri1[i].reshape((p1-p0)*nf,-1))
                eri1tmp = eri1tmp.reshape(p1-p0,nf,nao,nao)
                de[k,i] -= numpy.einsum('ijkl,ij,kl', eri1tmp, hf_dm1[p0:p1,q0:q1], zvec_ao) * 2
                de[k,i] -= numpy.einsum('ijkl,kl,ij', eri1tmp, hf_dm1, zvec_ao[p0:p1,q0:q1]) * 2
                de[k,i] += numpy.einsum('ijkl,il,kj', eri1tmp, hf_dm1[p0:p1], zvec_ao[q0:q1])
                de[k,i] += numpy.einsum('ijkl,jk,il', eri1tmp, hf_dm1[q0:q1], zvec_ao[p0:p1])

                #:vhf1c, vhf1a = mf_grad.get_veff(mol, (dm_core, dm_cas))
                #:de[k] += numpy.einsum('xij,ij->x', vhf1c[:,p0:p1], casci_dm1[p0:p1]) * 2
                #:de[k] += numpy.einsum('xij,ij->x', vhf1a[:,p0:p1], dm_core[p0:p1]) * 2
                de[k,i] -= numpy.einsum('ijkl,lk,ij', eri1tmp, dm_core[q0:q1], casci_dm1[p0:p1]) * 2
                de[k,i] += numpy.einsum('ijkl,jk,il', eri1tmp, dm_core[q0:q1], casci_dm1[p0:p1])
                de[k,i] -= numpy.einsum('ijkl,lk,ij', eri1tmp, dm_cas[q0:q1], dm_core[p0:p1]) * 2
                de[k,i] += numpy.einsum('ijkl,jk,il', eri1tmp, dm_cas[q0:q1], dm_core[p0:p1])
            eri1 = eri1tmp = None

        de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], im1[p0:p1])
        de[k] -= numpy.einsum('xij,ji->x', s1[:,p0:p1], im1[:,p0:p1])

        de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], zeta[p0:p1]) * 2
        de[k] -= numpy.einsum('xij,ji->x', s1[:,p0:p1], zeta[:,p0:p1]) * 2

        de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], vhf_s1occ[p0:p1]) * 2
        de[k] -= numpy.einsum('xij,ji->x', s1[:,p0:p1], vhf_s1occ[:,p0:p1]) * 2

    de += mf_grad.grad_nuc(mol, atmlst)
    return de
Exemplo n.º 7
0
def kernel(mp, t2, atmlst=None, mf_grad=None, verbose=logger.INFO):
    if mf_grad is None: mf_grad = mp._scf.nuc_grad_method()

    log = logger.new_logger(mp, verbose)
    time0 = time.clock(), time.time()

    log.debug('Build ump2 rdm1 intermediates')
    d1 = ump2._gamma1_intermediates(mp, t2)
    time1 = log.timer_debug1('rdm1 intermediates', *time0)
    log.debug('Build ump2 rdm2 intermediates')

    mol = mp.mol
    with_frozen = not (mp.frozen is None or mp.frozen is 0)
    moidx = mp.get_frozen_mask()
    OA_a, VA_a, OF_a, VF_a = mp2_grad._index_frozen_active(moidx[0], mp.mo_occ[0])
    OA_b, VA_b, OF_b, VF_b = mp2_grad._index_frozen_active(moidx[1], mp.mo_occ[1])
    orboa = mp.mo_coeff[0][:,OA_a]
    orbva = mp.mo_coeff[0][:,VA_a]
    orbob = mp.mo_coeff[1][:,OA_b]
    orbvb = mp.mo_coeff[1][:,VA_b]
    nao, nocca = orboa.shape
    nvira = orbva.shape[1]
    noccb = orbob.shape[1]
    nvirb = orbvb.shape[1]

# Partially transform MP2 density matrix and hold it in memory
# The rest transformation are applied during the contraction to ERI integrals
    t2aa, t2ab, t2bb = t2
    part_dm2aa = _ao2mo.nr_e2(t2aa.reshape(nocca**2,nvira**2),
                              numpy.asarray(orbva.T, order='F'), (0,nao,0,nao),
                              's1', 's1').reshape(nocca,nocca,nao,nao)
    part_dm2bb = _ao2mo.nr_e2(t2bb.reshape(noccb**2,nvirb**2),
                              numpy.asarray(orbvb.T, order='F'), (0,nao,0,nao),
                              's1', 's1').reshape(noccb,noccb,nao,nao)
    part_dm2ab = lib.einsum('ijab,pa,qb->ipqj', t2ab, orbva, orbvb)
    part_dm2aa = (part_dm2aa.transpose(0,2,3,1) -
                  part_dm2aa.transpose(0,3,2,1)) * .5
    part_dm2bb = (part_dm2bb.transpose(0,2,3,1) -
                  part_dm2bb.transpose(0,3,2,1)) * .5

    hf_dm1a, hf_dm1b = mp._scf.make_rdm1(mp.mo_coeff, mp.mo_occ)
    hf_dm1 = hf_dm1a + hf_dm1b

    if atmlst is None:
        atmlst = range(mol.natm)
    offsetdic = mol.offset_nr_by_atom()
    diagidx = numpy.arange(nao)
    diagidx = diagidx*(diagidx+1)//2 + diagidx
    de = numpy.zeros((len(atmlst),3))
    Imata = numpy.zeros((nao,nao))
    Imatb = numpy.zeros((nao,nao))
    fdm2 = lib.H5TmpFile()
    vhf1 = fdm2.create_dataset('vhf1', (len(atmlst),2,3,nao,nao), 'f8')

# 2e AO integrals dot 2pdm
    max_memory = max(0, mp.max_memory - lib.current_memory()[0])
    blksize = max(1, int(max_memory*.9e6/8/(nao**3*2.5)))

    for k, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = offsetdic[ia]
        ip1 = p0
        vhf = numpy.zeros((2,3,nao,nao))
        for b0, b1, nf in mp2_grad._shell_prange(mol, shl0, shl1, blksize):
            ip0, ip1 = ip1, ip1 + nf
            dm2bufa = lib.einsum('pi,iqrj->pqrj', orboa[ip0:ip1], part_dm2aa)
            dm2bufa+= lib.einsum('qi,iprj->pqrj', orboa, part_dm2aa[:,ip0:ip1])
            dm2bufa = lib.einsum('pqrj,sj->pqrs', dm2bufa, orboa)
            tmp = lib.einsum('pi,iqrj->pqrj', orboa[ip0:ip1], part_dm2ab)
            tmp+= lib.einsum('qi,iprj->pqrj', orboa, part_dm2ab[:,ip0:ip1])
            dm2bufa+= lib.einsum('pqrj,sj->pqrs', tmp, orbob)
            tmp = None
            dm2bufa = dm2bufa + dm2bufa.transpose(0,1,3,2)
            dm2bufa = lib.pack_tril(dm2bufa.reshape(-1,nao,nao)).reshape(nf,nao,-1)
            dm2bufa[:,:,diagidx] *= .5

            dm2bufb = lib.einsum('pi,iqrj->pqrj', orbob[ip0:ip1], part_dm2bb)
            dm2bufb+= lib.einsum('qi,iprj->pqrj', orbob, part_dm2bb[:,ip0:ip1])
            dm2bufb = lib.einsum('pqrj,sj->pqrs', dm2bufb, orbob)
            tmp = lib.einsum('iqrj,sj->iqrs', part_dm2ab, orbob[ip0:ip1])
            tmp+= lib.einsum('iqrj,sj->iqsr', part_dm2ab[:,:,ip0:ip1], orbob)
            dm2bufb+= lib.einsum('pi,iqrs->srpq', orboa, tmp)
            tmp = None
            dm2bufb = dm2bufb + dm2bufb.transpose(0,1,3,2)
            dm2bufb = lib.pack_tril(dm2bufb.reshape(-1,nao,nao)).reshape(nf,nao,-1)
            dm2bufb[:,:,diagidx] *= .5

            shls_slice = (b0,b1,0,mol.nbas,0,mol.nbas,0,mol.nbas)
            eri0 = mol.intor('int2e', aosym='s2kl', shls_slice=shls_slice)
            Imata += lib.einsum('ipx,iqx->pq', eri0.reshape(nf,nao,-1), dm2bufa)
            Imatb += lib.einsum('ipx,iqx->pq', eri0.reshape(nf,nao,-1), dm2bufb)
            eri0 = None

            eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl',
                             shls_slice=shls_slice).reshape(3,nf,nao,-1)
            de[k] -= numpy.einsum('xijk,ijk->x', eri1, dm2bufa) * 2
            de[k] -= numpy.einsum('xijk,ijk->x', eri1, dm2bufb) * 2
            dm2bufa = dm2bufb = None
# HF part
            for i in range(3):
                eri1tmp = lib.unpack_tril(eri1[i].reshape(nf*nao,-1))
                eri1tmp = eri1tmp.reshape(nf,nao,nao,nao)
                vhf[:,i] += numpy.einsum('ijkl,ij->kl', eri1tmp, hf_dm1[ip0:ip1])
                vhf[0,i] -= numpy.einsum('ijkl,il->kj', eri1tmp, hf_dm1a[ip0:ip1])
                vhf[1,i] -= numpy.einsum('ijkl,il->kj', eri1tmp, hf_dm1b[ip0:ip1])
                vhf[:,i,ip0:ip1] += numpy.einsum('ijkl,kl->ij', eri1tmp, hf_dm1)
                vhf[0,i,ip0:ip1] -= numpy.einsum('ijkl,jk->il', eri1tmp, hf_dm1a)
                vhf[1,i,ip0:ip1] -= numpy.einsum('ijkl,jk->il', eri1tmp, hf_dm1b)
            eri1 = eri1tmp = None
        vhf1[k] = vhf
        log.debug('2e-part grad of atom %d %s = %s', ia, mol.atom_symbol(ia), de[k])
        time1 = log.timer_debug1('2e-part grad of atom %d'%ia, *time1)

# Recompute nocc, nvir to include the frozen orbitals and make contraction for
# the 1-particle quantities, see also the kernel function in uccsd_grad module.
    mo_a, mo_b = mp.mo_coeff
    mo_ea, mo_eb = mp._scf.mo_energy
    nao, nmoa = mo_a.shape
    nmob = mo_b.shape[1]
    nocca = numpy.count_nonzero(mp.mo_occ[0] > 0)
    noccb = numpy.count_nonzero(mp.mo_occ[1] > 0)
    s0 = mp._scf.get_ovlp()
    Imata = reduce(numpy.dot, (mo_a.T, Imata, s0, mo_a)) * -1
    Imatb = reduce(numpy.dot, (mo_b.T, Imatb, s0, mo_b)) * -1

    dm1a = numpy.zeros((nmoa,nmoa))
    dm1b = numpy.zeros((nmob,nmob))
    doo, dOO = d1[0]
    dvv, dVV = d1[1]
    if with_frozen:
        dco = Imata[OF_a[:,None],OA_a] / (mo_ea[OF_a,None] - mo_ea[OA_a])
        dfv = Imata[VF_a[:,None],VA_a] / (mo_ea[VF_a,None] - mo_ea[VA_a])
        dm1a[OA_a[:,None],OA_a] = (doo + doo.T) * .5
        dm1a[OF_a[:,None],OA_a] = dco
        dm1a[OA_a[:,None],OF_a] = dco.T
        dm1a[VA_a[:,None],VA_a] = (dvv + dvv.T) * .5
        dm1a[VF_a[:,None],VA_a] = dfv
        dm1a[VA_a[:,None],VF_a] = dfv.T
        dco = Imatb[OF_b[:,None],OA_b] / (mo_eb[OF_b,None] - mo_eb[OA_b])
        dfv = Imatb[VF_b[:,None],VA_b] / (mo_eb[VF_b,None] - mo_eb[VA_b])
        dm1b[OA_b[:,None],OA_b] = (dOO + dOO.T) * .5
        dm1b[OF_b[:,None],OA_b] = dco
        dm1b[OA_b[:,None],OF_b] = dco.T
        dm1b[VA_b[:,None],VA_b] = (dVV + dVV.T) * .5
        dm1b[VF_b[:,None],VA_b] = dfv
        dm1b[VA_b[:,None],VF_b] = dfv.T
    else:
        dm1a[:nocca,:nocca] = (doo + doo.T) * .5
        dm1a[nocca:,nocca:] = (dvv + dvv.T) * .5
        dm1b[:noccb,:noccb] = (dOO + dOO.T) * .5
        dm1b[noccb:,noccb:] = (dVV + dVV.T) * .5

    dm1 = (reduce(numpy.dot, (mo_a, dm1a, mo_a.T)),
           reduce(numpy.dot, (mo_b, dm1b, mo_b.T)))
    vhf = mp._scf.get_veff(mp.mol, dm1)
    Xvo = reduce(numpy.dot, (mo_a[:,nocca:].T, vhf[0], mo_a[:,:nocca]))
    XVO = reduce(numpy.dot, (mo_b[:,noccb:].T, vhf[1], mo_b[:,:noccb]))
    Xvo+= Imata[:nocca,nocca:].T - Imata[nocca:,:nocca]
    XVO+= Imatb[:noccb,noccb:].T - Imatb[noccb:,:noccb]

    dm1_resp = _response_dm1(mp, (Xvo,XVO))
    dm1a += dm1_resp[0]
    dm1b += dm1_resp[1]
    time1 = log.timer_debug1('response_rdm1 intermediates', *time1)

    Imata[nocca:,:nocca] = Imata[:nocca,nocca:].T
    Imatb[noccb:,:noccb] = Imatb[:noccb,noccb:].T
    im1 = reduce(numpy.dot, (mo_a, Imata, mo_a.T))
    im1+= reduce(numpy.dot, (mo_b, Imatb, mo_b.T))
    time1 = log.timer_debug1('response_rdm1', *time1)

    log.debug('h1 and JK1')
    hcore_deriv = mf_grad.hcore_generator(mol)
    s1 = mf_grad.get_ovlp(mol)
    zeta = (mo_ea[:,None] + mo_ea) * .5
    zeta[nocca:,:nocca] = mo_ea[:nocca]
    zeta[:nocca,nocca:] = mo_ea[:nocca].reshape(-1,1)
    zeta_a = reduce(numpy.dot, (mo_a, zeta*dm1a, mo_a.T))
    zeta = (mo_eb[:,None] + mo_eb) * .5
    zeta[noccb:,:noccb] = mo_eb[:noccb]
    zeta[:noccb,noccb:] = mo_eb[:noccb].reshape(-1,1)
    zeta_b = reduce(numpy.dot, (mo_b, zeta*dm1b, mo_b.T))

    dm1 = (reduce(numpy.dot, (mo_a, dm1a, mo_a.T)),
           reduce(numpy.dot, (mo_b, dm1b, mo_b.T)))
    vhf_s1occ = mp._scf.get_veff(mol, (dm1[0]+dm1[0].T, dm1[1]+dm1[1].T))
    p1a = numpy.dot(mo_a[:,:nocca], mo_a[:,:nocca].T)
    p1b = numpy.dot(mo_b[:,:noccb], mo_b[:,:noccb].T)
    vhf_s1occ = (reduce(numpy.dot, (p1a, vhf_s1occ[0], p1a)) +
                 reduce(numpy.dot, (p1b, vhf_s1occ[1], p1b))) * .5
    time1 = log.timer_debug1('h1 and JK1', *time1)

    # Hartree-Fock part contribution
    dm1pa = hf_dm1a + dm1[0]*2
    dm1pb = hf_dm1b + dm1[1]*2
    dm1 = dm1[0] + dm1[1] + hf_dm1
    zeta_a += rhf_grad.make_rdm1e(mo_ea, mo_a, mp.mo_occ[0])
    zeta_b += rhf_grad.make_rdm1e(mo_eb, mo_b, mp.mo_occ[1])
    zeta = zeta_a + zeta_b

    for k, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = offsetdic[ia]
# s[1] dot I, note matrix im1 is not hermitian
        de[k] += numpy.einsum('xij,ij->x', s1[:,p0:p1], im1[p0:p1])
        de[k] += numpy.einsum('xji,ij->x', s1[:,p0:p1], im1[:,p0:p1])
# h[1] \dot DM, contribute to f1
        h1ao = hcore_deriv(ia)
        de[k] += numpy.einsum('xij,ji->x', h1ao, dm1)
# -s[1]*e \dot DM,  contribute to f1
        de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], zeta[p0:p1]  )
        de[k] -= numpy.einsum('xji,ij->x', s1[:,p0:p1], zeta[:,p0:p1])
# -vhf[s_ij[1]],  contribute to f1, *2 for s1+s1.T
        de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], vhf_s1occ[p0:p1]) * 2
        de[k] -= numpy.einsum('xij,ij->x', vhf1[k,0], dm1pa)
        de[k] -= numpy.einsum('xij,ij->x', vhf1[k,1], dm1pb)

    de += mf_grad.grad_nuc(mol)
    log.timer('%s gradients' % mp.__class__.__name__, *time0)
    return de
Exemplo n.º 8
0
def Lci_dot_dgci_dx(Lci,
                    weights,
                    mc,
                    mo_coeff=None,
                    ci=None,
                    atmlst=None,
                    mf_grad=None,
                    eris=None,
                    verbose=None):
    ''' Modification of pyscf.grad.casscf.kernel to compute instead the CI
    Lagrange term nuclear gradient (sum_IJ Lci_IJ d2_Ecas/d_lambda d_PIJ)
    This involves removing all core-core and nuclear-nuclear terms and making the substitution
    sum_I w_I<L_I|p'q|I> + c.c. -> <0|p'q|0>
    sum_I w_I<L_I|p'r'sq|I> + c.c. -> <0|p'r'sq|0>
    The active-core terms (sum_I w_I<L_I|x'iyi|I>, sum_I w_I <L_I|x'iiy|I>, c.c.) must be retained.'''
    if mo_coeff is None: mo_coeff = mc.mo_coeff
    if ci is None: ci = mc.ci
    if mf_grad is None: mf_grad = mc._scf.nuc_grad_method()
    if mc.frozen is not None:
        raise NotImplementedError

    t0 = (logger.process_clock(), logger.perf_counter())
    mol = mc.mol
    ncore = mc.ncore
    ncas = mc.ncas
    nocc = ncore + ncas
    nelecas = mc.nelecas
    nao, nmo = mo_coeff.shape
    nao_pair = nao * (nao + 1) // 2

    mo_occ = mo_coeff[:, :nocc]
    mo_core = mo_coeff[:, :ncore]
    mo_cas = mo_coeff[:, ncore:nocc]

    # MRH: TDMs + c.c. instead of RDMs; 06/30/2020: new interface in mcscf.addons makes this much more transparent
    casdm1, casdm2 = mc.fcisolver.trans_rdm12(Lci, ci, ncas, nelecas)
    casdm1 += casdm1.transpose(1, 0)
    casdm2 += casdm2.transpose(1, 0, 3, 2)

    # gfock = Generalized Fock, Adv. Chem. Phys., 69, 63
    dm_core = np.dot(mo_core, mo_core.T) * 2
    dm_cas = reduce(np.dot, (mo_cas, casdm1, mo_cas.T))
    aapa = np.zeros((ncas, ncas, nmo, ncas), dtype=dm_cas.dtype)
    for i in range(nmo):
        aapa[:, :, i, :] = eris.ppaa[i][ncore:nocc, :, :].transpose(1, 2, 0)
    vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas))
    h1 = mc.get_hcore()
    vhf_c = vj[0] - vk[0] * .5
    vhf_a = vj[1] - vk[1] * .5
    # MRH: delete h1 + vhf_c from the first line below (core and core-core stuff)
    # Also extend gfock to span the whole space
    gfock = np.zeros_like(dm_cas)
    gfock[:, :nocc] = reduce(np.dot, (mo_coeff.T, vhf_a, mo_occ)) * 2
    gfock[:, ncore:nocc] = reduce(np.dot,
                                  (mo_coeff.T, h1 + vhf_c, mo_cas, casdm1))
    gfock[:, ncore:nocc] += np.einsum('uvpw,vuwt->pt', aapa, casdm2)
    dme0 = reduce(np.dot, (mo_coeff, (gfock + gfock.T) * .5, mo_coeff.T))
    aapa = vj = vk = vhf_c = vhf_a = h1 = gfock = None

    vj, vk = mf_grad.get_jk(mol, (dm_core, dm_cas))
    vhf1c, vhf1a = vj - vk * 0.5
    #vhf1c, vhf1a = mf_grad.get_veff(mol, (dm_core, dm_cas))
    hcore_deriv = mf_grad.hcore_generator(mol)
    s1 = mf_grad.get_ovlp(mol)

    diag_idx = np.arange(nao)
    diag_idx = diag_idx * (diag_idx + 1) // 2 + diag_idx
    casdm2_cc = casdm2 + casdm2.transpose(0, 1, 3, 2)
    dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2, ncas**2), mo_cas.T,
                                (0, nao, 0, nao)).reshape(ncas**2, nao, nao)
    dm2buf = lib.pack_tril(dm2buf)
    dm2buf[:, diag_idx] *= .5
    dm2buf = dm2buf.reshape(ncas, ncas, nao_pair)
    casdm2 = casdm2_cc = None

    if atmlst is None:
        atmlst = range(mol.natm)
    aoslices = mol.aoslice_by_atom()
    de_hcore = np.zeros((len(atmlst), 3))
    de_renorm = np.zeros((len(atmlst), 3))
    de_eri = np.zeros((len(atmlst), 3))
    de = np.zeros((len(atmlst), 3))

    max_memory = mc.max_memory - lib.current_memory()[0]
    blksize = int(max_memory * .9e6 / 8 /
                  (4 * (aoslices[:, 3] - aoslices[:, 2]).max() * nao_pair))
    # MRH: 3 components of eri array and 1 density matrix array: FOUR arrays of this size are required!
    blksize = min(nao, max(2, blksize))
    logger.info(
        mc,
        'SA-CASSCF Lci_dot_dgci memory remaining for eri manipulation: {} MB; using blocksize = {}'
        .format(max_memory, blksize))
    t0 = logger.timer(mc, 'SA-CASSCF Lci_dot_dgci 1-electron part', *t0)

    for k, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = aoslices[ia]
        h1ao = hcore_deriv(ia)
        # MRH: dm1 -> dm_cas in the line below
        de_hcore[k] += np.einsum('xij,ij->x', h1ao, dm_cas)
        de_renorm[k] -= np.einsum('xij,ij->x', s1[:, p0:p1], dme0[p0:p1]) * 2

        q1 = 0
        for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize):
            q0, q1 = q1, q1 + nf
            dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1],
                                mo_cas[q0:q1])
            shls_slice = (shl0, shl1, b0, b1, 0, mol.nbas, 0, mol.nbas)
            gc.collect()
            eri1 = mol.intor('int2e_ip1',
                             comp=3,
                             aosym='s2kl',
                             shls_slice=shls_slice).reshape(
                                 3, p1 - p0, nf, nao_pair)
            de_eri[k] -= np.einsum('xijw,ijw->x', eri1, dm2_ao) * 2
            eri1 = dm2_ao = None
            gc.collect()
            t0 = logger.timer(
                mc, 'SA-CASSCF Lci_dot_dgci atom {} ({},{}|{})'.format(
                    ia, p1 - p0, nf, nao_pair), *t0)
        # MRH: dm1 -> dm_cas in the line below. Also eliminate core-core terms
        de_eri[k] += np.einsum('xij,ij->x', vhf1c[:, p0:p1], dm_cas[p0:p1]) * 2
        de_eri[k] += np.einsum('xij,ij->x', vhf1a[:, p0:p1],
                               dm_core[p0:p1]) * 2

    logger.debug(mc, "CI lagrange hcore component:\n{}".format(de_hcore))
    logger.debug(mc, "CI lagrange renorm component:\n{}".format(de_renorm))
    logger.debug(mc, "CI lagrange eri component:\n{}".format(de_eri))
    de = de_hcore + de_renorm + de_eri
    return de
Exemplo n.º 9
0
def Lorb_dot_dgorb_dx(Lorb,
                      mc,
                      mo_coeff=None,
                      ci=None,
                      atmlst=None,
                      mf_grad=None,
                      eris=None,
                      verbose=None):
    ''' Modification of pyscf.grad.casscf.kernel to compute instead the orbital
    Lagrange term nuclear gradient (sum_pq Lorb_pq d2_Ecas/d_lambda d_kpq)
    This involves removing nuclear-nuclear terms and making the substitution
    (D_[p]q + D_p[q]) -> D_pq
    (d_[p]qrs + d_pq[r]s + d_p[q]rs + d_pqr[s]) -> d_pqrs
    Where [] around an index implies contraction with Lorb from the left, so that the external index
    (regardless of whether the index on the rdm is bra or ket) is always the first index of Lorb. '''

    # dmo = smoT.dao.smo
    # dao = mo.dmo.moT
    t0 = (logger.process_clock(), logger.perf_counter())

    if mo_coeff is None: mo_coeff = mc.mo_coeff
    if ci is None: ci = mc.ci
    if mf_grad is None: mf_grad = mc._scf.nuc_grad_method()
    if mc.frozen is not None:
        raise NotImplementedError

    mol = mc.mol
    ncore = mc.ncore
    ncas = mc.ncas
    nocc = ncore + ncas
    nelecas = mc.nelecas
    nao, nmo = mo_coeff.shape
    nao_pair = nao * (nao + 1) // 2

    mo_core = mo_coeff[:, :ncore]
    mo_cas = mo_coeff[:, ncore:nocc]

    # MRH: new 'effective' MO coefficients including contraction from the Lagrange multipliers
    moL_coeff = np.dot(mo_coeff, Lorb)
    s0_inv = np.dot(mo_coeff, mo_coeff.T)
    moL_core = moL_coeff[:, :ncore]
    moL_cas = moL_coeff[:, ncore:nocc]

    # MRH: these SHOULD be state-averaged! Use the actual sacasscf object!
    casdm1, casdm2 = mc.fcisolver.make_rdm12(ci, ncas, nelecas)

    # gfock = Generalized Fock, Adv. Chem. Phys., 69, 63
    # MRH: each index exactly once!
    dm_core = np.dot(mo_core, mo_core.T) * 2
    dm_cas = reduce(np.dot, (mo_cas, casdm1, mo_cas.T))
    # MRH: new density matrix terms
    dmL_core = np.dot(moL_core, mo_core.T) * 2
    dmL_cas = reduce(np.dot, (moL_cas, casdm1, mo_cas.T))
    dmL_core += dmL_core.T
    dmL_cas += dmL_cas.T
    dm1 = dm_core + dm_cas
    dm1L = dmL_core + dmL_cas
    # MRH: end new density matrix terms
    # MRH: wrap the integral instead of the density matrix. I THINK the sign is the same!
    # mo sets 0 and 2 should be transposed, 1 and 3 should be not transposed; this will lead to correct sign
    # Except I can't do this for the external index, because the external index is contracted to ovlp matrix,
    # not the 2RDM
    aapa = np.zeros((ncas, ncas, nmo, ncas), dtype=dm_cas.dtype)
    aapaL = np.zeros((ncas, ncas, nmo, ncas), dtype=dm_cas.dtype)
    for i in range(nmo):
        jbuf = eris.ppaa[i]
        kbuf = eris.papa[i]
        aapa[:, :, i, :] = jbuf[ncore:nocc, :, :].transpose(1, 2, 0)
        aapaL[:, :, i, :] += np.tensordot(jbuf,
                                          Lorb[:, ncore:nocc],
                                          axes=((0), (0)))
        kbuf = np.tensordot(kbuf, Lorb[:, ncore:nocc],
                            axes=((1), (0))).transpose(1, 2, 0)
        aapaL[:, :, i, :] += kbuf + kbuf.transpose(1, 0, 2)
    # MRH: new vhf terms
    vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas))
    vjL, vkL = mc._scf.get_jk(mol, (dmL_core, dmL_cas))
    h1 = mc.get_hcore()
    vhf_c = vj[0] - vk[0] * .5
    vhf_a = vj[1] - vk[1] * .5
    vhfL_c = vjL[0] - vkL[0] * .5
    vhfL_a = vjL[1] - vkL[1] * .5
    # MRH: I rewrote this Feff calculation completely, double-check it
    gfock = np.dot(h1, dm1L)  # h1e
    gfock += np.dot((vhf_c + vhf_a),
                    dmL_core)  # core-core and active-core, 2nd 1RDM linked
    gfock += np.dot((vhfL_c + vhfL_a),
                    dm_core)  # core-core and active-core, 1st 1RDM linked
    gfock += np.dot(vhfL_c, dm_cas)  # core-active, 1st 1RDM linked
    gfock += np.dot(vhf_c, dmL_cas)  # core-active, 2nd 1RDM linked
    gfock = np.dot(
        s0_inv, gfock
    )  # Definition of quantity is in MO's; going (AO->MO->AO) incurs an inverse ovlp
    gfock += reduce(np.dot, (mo_coeff, np.einsum(
        'uviw,uvtw->it', aapaL, casdm2), mo_cas.T))  # active-active
    # MRH: I have to contract this external 2RDM index explicitly on the 2RDM but fortunately I can do so here
    gfock += reduce(
        np.dot,
        (mo_coeff, np.einsum('uviw,vuwt->it', aapa, casdm2), moL_cas.T))
    # MRH: As of 04/18/2019, the two-body part of this is including aapaL is definitely, unambiguously correct
    dme0 = (gfock +
            gfock.T) / 2  # This transpose is for the overlap matrix later on
    aapa = vj = vk = vhf_c = vhf_a = None

    vj, vk = mf_grad.get_jk(mol, (dm_core, dm_cas, dmL_core, dmL_cas))
    vhf1c, vhf1a, vhf1cL, vhf1aL = vj - vk * 0.5
    #vhf1c, vhf1a, vhf1cL, vhf1aL = mf_grad.get_veff(mol, (dm_core, dm_cas, dmL_core, dmL_cas))
    hcore_deriv = mf_grad.hcore_generator(mol)
    s1 = mf_grad.get_ovlp(mol)

    diag_idx = np.arange(nao)
    diag_idx = diag_idx * (diag_idx + 1) // 2 + diag_idx
    casdm2_cc = casdm2 + casdm2.transpose(0, 1, 3, 2)
    dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2, ncas**2), mo_cas.T,
                                (0, nao, 0, nao)).reshape(ncas**2, nao, nao)
    # MRH: contract the final two indices of the active-active 2RDM with L as you change to AOs
    # note tensordot always puts indices in the order of the arguments.
    dm2Lbuf = np.zeros((ncas**2, nmo, nmo))
    # MRH: The second line below transposes the L; the third line transposes the derivative later on
    # Both the L and the derivative have to explore all indices
    dm2Lbuf[:, :, ncore:nocc] = np.tensordot(
        Lorb[:, ncore:nocc], casdm2,
        axes=(1, 2)).transpose(1, 2, 0, 3).reshape(ncas**2, nmo, ncas)
    dm2Lbuf[:, ncore:nocc, :] += np.tensordot(
        Lorb[:, ncore:nocc], casdm2,
        axes=(1, 3)).transpose(1, 2, 3, 0).reshape(ncas**2, ncas, nmo)
    dm2Lbuf += dm2Lbuf.transpose(0, 2, 1)
    dm2Lbuf = np.ascontiguousarray(dm2Lbuf)
    dm2Lbuf = ao2mo._ao2mo.nr_e2(dm2Lbuf.reshape(ncas**2, nmo**2), mo_coeff.T,
                                 (0, nao, 0, nao)).reshape(ncas**2, nao, nao)
    dm2buf = lib.pack_tril(dm2buf)
    dm2buf[:, diag_idx] *= .5
    dm2buf = dm2buf.reshape(ncas, ncas, nao_pair)
    dm2Lbuf = lib.pack_tril(dm2Lbuf)
    dm2Lbuf[:, diag_idx] *= .5
    dm2Lbuf = dm2Lbuf.reshape(ncas, ncas, nao_pair)

    if atmlst is None:
        atmlst = list(range(mol.natm))
    aoslices = mol.aoslice_by_atom()
    de_hcore = np.zeros((len(atmlst), 3))
    de_renorm = np.zeros((len(atmlst), 3))
    de_eri = np.zeros((len(atmlst), 3))
    de = np.zeros((len(atmlst), 3))

    max_memory = mc.max_memory - lib.current_memory()[0]
    blksize = int(max_memory * .9e6 / 8 /
                  (4 * (aoslices[:, 3] - aoslices[:, 2]).max() * nao_pair))
    # MRH: 3 components of eri array and 1 density matrix array: FOUR arrays of this size are required!
    blksize = min(nao, max(2, blksize))
    logger.info(
        mc,
        'SA-CASSCF Lorb_dot_dgorb memory remaining for eri manipulation: {} MB; using blocksize = {}'
        .format(max_memory, blksize))
    t0 = logger.timer(mc, 'SA-CASSCF Lorb_dot_dgorb 1-electron part', *t0)

    for k, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = aoslices[ia]
        h1ao = hcore_deriv(ia)
        # MRH: h1e and Feff terms
        de_hcore[k] += np.einsum('xij,ij->x', h1ao, dm1L)
        de_renorm[k] -= np.einsum('xij,ij->x', s1[:, p0:p1], dme0[p0:p1]) * 2

        q1 = 0
        for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize):
            q0, q1 = q1, q1 + nf
            dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2Lbuf, mo_cas[p0:p1],
                                mo_cas[q0:q1])
            # MRH: now contract the first two indices of the active-active 2RDM with L as you go from MOs to AOs
            dm2_ao += lib.einsum('ijw,pi,qj->pqw', dm2buf, moL_cas[p0:p1],
                                 mo_cas[q0:q1])
            dm2_ao += lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1],
                                 moL_cas[q0:q1])
            shls_slice = (shl0, shl1, b0, b1, 0, mol.nbas, 0, mol.nbas)
            gc.collect()
            eri1 = mol.intor('int2e_ip1',
                             comp=3,
                             aosym='s2kl',
                             shls_slice=shls_slice).reshape(
                                 3, p1 - p0, nf, nao_pair)
            # MRH: I still don't understand why there is a minus here!
            de_eri[k] -= np.einsum('xijw,ijw->x', eri1, dm2_ao) * 2
            eri1 = dm2_ao = None
            gc.collect()
            t0 = logger.timer(
                mc, 'SA-CASSCF Lorb_dot_dgorb atom {} ({},{}|{})'.format(
                    ia, p1 - p0, nf, nao_pair), *t0)
        # MRH: core-core and core-active 2RDM terms
        de_eri[k] += np.einsum('xij,ij->x', vhf1c[:, p0:p1], dm1L[p0:p1]) * 2
        de_eri[k] += np.einsum('xij,ij->x', vhf1cL[:, p0:p1], dm1[p0:p1]) * 2
        # MRH: active-core 2RDM terms
        de_eri[k] += np.einsum('xij,ij->x', vhf1a[:, p0:p1],
                               dmL_core[p0:p1]) * 2
        de_eri[k] += np.einsum('xij,ij->x', vhf1aL[:, p0:p1],
                               dm_core[p0:p1]) * 2

    # MRH: deleted the nuclear-nuclear part to avoid double-counting
    # lesson learned from debugging - mol.intor computes -1 * the derivative and only
    # for one index
    # on the other hand, mf_grad.hcore_generator computes the actual derivative of
    # h1 for both indices and with the correct sign

    logger.debug(mc, "Orb lagrange hcore component:\n{}".format(de_hcore))
    logger.debug(mc, "Orb lagrange renorm component:\n{}".format(de_renorm))
    logger.debug(mc, "Orb lagrange eri component:\n{}".format(de_eri))
    de = de_hcore + de_renorm + de_eri

    return de
Exemplo n.º 10
0
def kernel(mc, mo_coeff=None, ci=None, atmlst=None, mf_grad=None,
           verbose=None):
    if mo_coeff is None: mo_coeff = mc._scf.mo_coeff
    if ci is None: ci = mc.ci
    if mf_grad is None: mf_grad = mc._scf.nuc_grad_method()

    mol = mc.mol
    ncore = mc.ncore
    ncas = mc.ncas
    nocc = ncore + ncas
    nelecas = mc.nelecas
    nao, nmo = mo_coeff.shape
    nao_pair = nao * (nao+1) // 2
    mo_energy = mc._scf.mo_energy

    hcore_deriv = mf_grad.hcore_generator(mol)
    s1 = mf_grad.get_ovlp(mol)
    mo_occ = mo_coeff[:,:nocc]
    mo_core = mo_coeff[:,:ncore]
    mo_cas = mo_coeff[:,ncore:nocc]

    casdm1, casdm2 = mc.fcisolver.make_rdm12(mc.ci, ncas, nelecas)

# gfock = Generalized Fock, Adv. Chem. Phys., 69, 63
    dm_core = numpy.dot(mo_core, mo_core.T) * 2
    dm_cas = reduce(numpy.dot, (mo_cas, casdm1, mo_cas.T))
    aapa = ao2mo.kernel(mol, (mo_cas, mo_cas, mo_occ, mo_cas), compact=False)
    aapa = aapa.reshape(ncas,ncas,nocc,ncas)
    vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas))
    h1 = mc.get_hcore()
    vhf_c = vj[0] - vk[0] * .5
    vhf_a = vj[1] - vk[1] * .5
    gfock = reduce(numpy.dot, (mo_occ.T, h1 + vhf_c + vhf_a, mo_occ)) * 2
    gfock[:,ncore:nocc] = reduce(numpy.dot, (mo_occ.T, h1 + vhf_c, mo_cas, casdm1))
    gfock[:,ncore:nocc] += numpy.einsum('uviw,vuwt->it', aapa, casdm2)
    dme0 = reduce(numpy.dot, (mo_occ, (gfock+gfock.T)*.5, mo_occ.T))
    aapa = vj = vk = vhf_c = vhf_a = h1 = gfock = None

    dm1 = dm_core + dm_cas
    vhf1c, vhf1a = mf_grad.get_veff(mol, (dm_core, dm_cas))

    diag_idx = numpy.arange(nao)
    diag_idx = diag_idx * (diag_idx+1) // 2 + diag_idx
    casdm2_cc = casdm2 + casdm2.transpose(0,1,3,2)
    dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2,ncas**2), mo_cas.T,
                                (0, nao, 0, nao)).reshape(ncas**2,nao,nao)
    dm2buf = lib.pack_tril(dm2buf)
    dm2buf[:,diag_idx] *= .5
    dm2buf = dm2buf.reshape(ncas,ncas,nao_pair)
    #casdm2 = casdm2_cc = None

    atmlst = range(mol.natm)
    aoslices = mol.aoslice_by_atom()
    de = numpy.zeros((len(atmlst),3))

    max_memory = mc.max_memory - lib.current_memory()[0]
    blksize = int(max_memory*.9e6/8 / ((aoslices[:,3]-aoslices[:,2]).max()*nao_pair))
    blksize = min(nao, max(2, blksize))

    for k, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = aoslices[ia]
        h1ao = hcore_deriv(ia)
        de[k] += numpy.einsum('xij,ij->x', h1ao, dm1)
        #de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], dme0[p0:p1]) * 2

        q1 = 0
        for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize):
            q0, q1 = q1, q1 + nf
            dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1], mo_cas[q0:q1])
            shls_slice = (shl0,shl1,b0,b1,0,mol.nbas,0,mol.nbas)
            eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl',
                             shls_slice=shls_slice).reshape(3,p1-p0,nf,nao_pair)
            de[k] -= numpy.einsum('xijw,ijw->x', eri1, dm2_ao) * 2
            eri1 = None
        de[k] += numpy.einsum('xij,ij->x', vhf1c[:,p0:p1], dm1[p0:p1]) * 2
        de[k] += numpy.einsum('xij,ij->x', vhf1a[:,p0:p1], dm_core[p0:p1]) * 2

    dm2 = numpy.zeros((nmo,nmo,nmo,nmo))
    for i in range(ncore):
        for j in range(ncore):
            dm2[i,i,j,j] += 4
            dm2[i,j,j,i] -= 2
        dm2[i,i,ncore:nocc,ncore:nocc] = casdm1 * 2
        dm2[ncore:nocc,ncore:nocc,i,i] = casdm1 * 2
        dm2[i,ncore:nocc,ncore:nocc,i] =-casdm1
        dm2[ncore:nocc,i,i,ncore:nocc] =-casdm1
    dm2[ncore:nocc,ncore:nocc,ncore:nocc,ncore:nocc] = casdm2
    eri0 = ao2mo.restore(1, ao2mo.full(mc._scf._eri, mo_coeff), nmo)
    Imat = numpy.einsum('pjkl,qjkl->pq', eri0, dm2)

    dm1 = numpy.zeros((nmo,nmo))
    for i in range(ncore):
        dm1[i,i] = 2
    dm1[ncore:nocc,ncore:nocc] = casdm1

    neleca, nelecb = mol.nelec

    h1 =-(mol.intor('int1e_ipkin', comp=3)
         +mol.intor('int1e_ipnuc', comp=3))
    s1 =-mol.intor('int1e_ipovlp', comp=3)
    eri1 = mol.intor('int2e_ip1', comp=3).reshape(3,nao,nao,nao,nao)
    eri1 = numpy.einsum('xipkl,pj->xijkl', eri1, mo_coeff)
    eri1 = numpy.einsum('xijpl,pk->xijkl', eri1, mo_coeff)
    eri1 = numpy.einsum('xijkp,pl->xijkl', eri1, mo_coeff)
    h0 = reduce(numpy.dot, (mo_coeff.T, mc._scf.get_hcore(), mo_coeff))
    g0 = ao2mo.restore(1, ao2mo.full(mol, mo_coeff), nmo)

    def hess():
        nocc = mol.nelectron//2
        nvir = nmo - nocc
        eri_mo = g0
        eai = lib.direct_sum('a-i->ai', mo_energy[nocc:], mo_energy[:nocc])
        h = eri_mo[nocc:,:nocc,nocc:,:nocc] * 4
        h-= numpy.einsum('cdlk->ckdl', eri_mo[nocc:,nocc:,:nocc,:nocc])
        h-= numpy.einsum('cldk->ckdl', eri_mo[nocc:,:nocc,nocc:,:nocc])
        for a in range(nvir):
            for i in range(nocc):
                h[a,i,a,i] += eai[a,i]
        return -h.reshape(nocc*nvir,-1)
    hh = hess()
    ee = mo_energy[:,None] - mo_energy

    for k,(sh0, sh1, p0, p1) in enumerate(mol.offset_nr_by_atom()):
        mol.set_rinv_origin(mol.atom_coord(k))
        vrinv = -mol.atom_charge(k) * mol.intor('int1e_iprinv', comp=3)

# 2e AO integrals dot 2pdm
        for i in range(3):
            g1 = numpy.einsum('pjkl,pi->ijkl', eri1[i,p0:p1], mo_coeff[p0:p1])
            g1 = g1 + g1.transpose(1,0,2,3)
            g1 = g1 + g1.transpose(2,3,0,1)
            g1 *= -1
            hx =(numpy.einsum('pq,pi,qj->ij', h1[i,p0:p1], mo_coeff[p0:p1], mo_coeff)
               + reduce(numpy.dot, (mo_coeff.T, vrinv[i], mo_coeff)))
            hx = hx + hx.T
            sx = numpy.einsum('pq,pi,qj->ij', s1[i,p0:p1], mo_coeff[p0:p1], mo_coeff)
            sx = sx + sx.T

            fij =(hx[:neleca,:neleca]
                  - numpy.einsum('ij,j->ij', sx[:neleca,:neleca], mo_energy[:neleca])
                  - numpy.einsum('kl,ijlk->ij', sx[:neleca,:neleca],
                                 g0[:neleca,:neleca,:neleca,:neleca]) * 2
                  + numpy.einsum('kl,iklj->ij', sx[:neleca,:neleca],
                                 g0[:neleca,:neleca,:neleca,:neleca])
                  + numpy.einsum('ijkk->ij', g1[:neleca,:neleca,:neleca,:neleca]) * 2
                  - numpy.einsum('ikkj->ij', g1[:neleca,:neleca,:neleca,:neleca]))

            fab =(hx[neleca:,neleca:]
                  - numpy.einsum('ij,j->ij', sx[neleca:,neleca:], mo_energy[neleca:])
                  - numpy.einsum('kl,ijlk->ij', sx[:neleca,:neleca],
                                 g0[neleca:,neleca:,:neleca,:neleca]) * 2
                  + numpy.einsum('kl,iklj->ij', sx[:neleca,:neleca],
                                 g0[neleca:,:neleca,:neleca,neleca:])
                  + numpy.einsum('ijkk->ij', g1[neleca:,neleca:,:neleca,:neleca]) * 2
                  - numpy.einsum('ikkj->ij', g1[neleca:,:neleca,:neleca,neleca:]))

            fai =(hx[neleca:,:neleca]
                  - numpy.einsum('ai,i->ai', sx[neleca:,:neleca], mo_energy[:neleca])
                  - numpy.einsum('kl,ijlk->ij', sx[:neleca,:neleca],
                                 g0[neleca:,:neleca,:neleca,:neleca]) * 2
                  + numpy.einsum('kl,iklj->ij', sx[:neleca,:neleca],
                                 g0[neleca:,:neleca,:neleca,:neleca])
                  + numpy.einsum('ijkk->ij', g1[neleca:,:neleca,:neleca,:neleca]) * 2
                  - numpy.einsum('ikkj->ij', g1[neleca:,:neleca,:neleca,:neleca]))
            c1 = numpy.zeros((nmo,nmo))
            c1[:neleca,:neleca] = -.5 * sx[:neleca,:neleca]
            c1[neleca:,neleca:] = -.5 * sx[neleca:,neleca:]
            cvo1 = numpy.linalg.solve(hh, fai.ravel()).reshape(-1,neleca)
            cov1 = -(sx[neleca:,:neleca] + cvo1).T
            c1[neleca:,:neleca] = cvo1
            c1[:neleca,neleca:] = cov1
            v1 = numpy.einsum('pqai,ai->pq', g0[:,:,neleca:,:neleca], cvo1) * 4
            v1-= numpy.einsum('paiq,ai->pq', g0[:,neleca:,:neleca,:], cvo1)
            v1-= numpy.einsum('piaq,ai->pq', g0[:,:neleca,neleca:,:], cvo1)
            fij += v1[:neleca,:neleca]
            fab += v1[neleca:,neleca:]
            c1[:ncore,ncore:neleca] = -fij[:ncore,ncore:] / ee[:ncore,ncore:neleca]
            c1[ncore:neleca,:ncore] = -fij[ncore:,:ncore] / ee[ncore:neleca,:ncore]
            m = nocc - neleca
            c1[nocc:,neleca:nocc] = -fab[m:,:m] / ee[nocc:,neleca:nocc]
            c1[neleca:nocc,nocc:] = -fab[:m,m:] / ee[neleca:nocc,nocc:]
            h0c1 = h0.dot(c1)
            h0c1 = h0c1 + h0c1.T
            g0c1 = numpy.einsum('pjkl,pi->ijkl', g0, c1)
            g0c1 = g0c1 + g0c1.transpose(1,0,2,3)
            g0c1 = g0c1 + g0c1.transpose(2,3,0,1)

            de[k,i] += numpy.einsum('ij,ji', h0c1, dm1)
            de[k,i] += numpy.einsum('ijkl,jilk', g0c1, dm2)*.5

    de += rhf_grad.grad_nuc(mol)
    return de
Exemplo n.º 11
0
def Lci_dot_dgci_dx(Lci,
                    weights,
                    mc,
                    mo_coeff=None,
                    ci=None,
                    atmlst=None,
                    mf_grad=None,
                    verbose=None):
    ''' Modification of pyscf.grad.casscf.kernel to compute instead the CI
    Lagrange term nuclear gradient (sum_IJ Lci_IJ d2_Ecas/d_lambda d_PIJ)
    This involves removing all core-core and nuclear-nuclear terms and making the substitution
    sum_I w_I<L_I|p'q|I> + c.c. -> <0|p'q|0>
    sum_I w_I<L_I|p'r'sq|I> + c.c. -> <0|p'r'sq|0>
    The active-core terms (sum_I w_I<L_I|x'iyi|I>, sum_I w_I <L_I|x'iiy|I>, c.c.) must be retained.'''
    if mo_coeff is None: mo_coeff = mc.mo_coeff
    if ci is None: ci = mc.ci
    if mf_grad is None: mf_grad = mc._scf.nuc_grad_method()
    if mc.frozen is not None:
        raise NotImplementedError

    mol = mc.mol
    ncore = mc.ncore
    ncas = mc.ncas
    nocc = ncore + ncas
    nelecas = mc.nelecas
    nao, nmo = mo_coeff.shape
    nao_pair = nao * (nao + 1) // 2
    nroots = ci.shape[0]

    mo_occ = mo_coeff[:, :nocc]
    mo_core = mo_coeff[:, :ncore]
    mo_cas = mo_coeff[:, ncore:nocc]

    # MRH: TDMs + c.c. instead of RDMs
    casdm1 = np.zeros((nroots, ncas, ncas))
    casdm2 = np.zeros((nroots, ncas, ncas, ncas, ncas))
    for iroot in range(nroots):
        #print ("norm of Lci, ci for root {}: {} {}".format (iroot, linalg.norm (Lci[iroot]), linalg.norm (ci[iroot])))
        casdm1[iroot], casdm2[iroot] = mc.fcisolver.trans_rdm12(
            Lci[iroot], ci[iroot], ncas, nelecas)
    casdm1 = (casdm1 * weights[:, None, None]).sum(0)
    casdm2 = (casdm2 * weights[:, None, None, None, None]).sum(0)
    casdm1 += casdm1.transpose(1, 0)
    casdm2 += casdm2.transpose(1, 0, 3, 2)

    # gfock = Generalized Fock, Adv. Chem. Phys., 69, 63
    dm_core = np.dot(mo_core, mo_core.T) * 2
    dm_cas = reduce(np.dot, (mo_cas, casdm1, mo_cas.T))
    aapa = ao2mo.kernel(mol, (mo_cas, mo_cas, mo_coeff, mo_cas), compact=False)
    aapa = aapa.reshape(ncas, ncas, nmo, ncas)
    vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas))
    h1 = mc.get_hcore()
    vhf_c = vj[0] - vk[0] * .5
    vhf_a = vj[1] - vk[1] * .5
    # MRH: delete h1 + vhf_c from the first line below (core and core-core stuff)
    # Also extend gfock to span the whole space
    gfock = np.zeros_like(dm_cas)
    gfock[:, :nocc] = reduce(np.dot, (mo_coeff.T, vhf_a, mo_occ)) * 2
    gfock[:, ncore:nocc] = reduce(np.dot,
                                  (mo_coeff.T, h1 + vhf_c, mo_cas, casdm1))
    gfock[:, ncore:nocc] += np.einsum('uvpw,vuwt->pt', aapa, casdm2)
    dme0 = reduce(np.dot, (mo_coeff, (gfock + gfock.T) * .5, mo_coeff.T))
    aapa = vj = vk = vhf_c = vhf_a = h1 = gfock = None

    vhf1c, vhf1a = mf_grad.get_veff(mol, (dm_core, dm_cas))
    hcore_deriv = mf_grad.hcore_generator(mol)
    s1 = mf_grad.get_ovlp(mol)

    diag_idx = np.arange(nao)
    diag_idx = diag_idx * (diag_idx + 1) // 2 + diag_idx
    casdm2_cc = casdm2 + casdm2.transpose(0, 1, 3, 2)
    dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2, ncas**2), mo_cas.T,
                                (0, nao, 0, nao)).reshape(ncas**2, nao, nao)
    dm2buf = lib.pack_tril(dm2buf)
    dm2buf[:, diag_idx] *= .5
    dm2buf = dm2buf.reshape(ncas, ncas, nao_pair)
    casdm2 = casdm2_cc = None

    if atmlst is None:
        atmlst = range(mol.natm)
    aoslices = mol.aoslice_by_atom()
    de_hcore = np.zeros((len(atmlst), 3))
    de_renorm = np.zeros((len(atmlst), 3))
    de_eri = np.zeros((len(atmlst), 3))
    de = np.zeros((len(atmlst), 3))

    max_memory = mc.max_memory - lib.current_memory()[0]
    blksize = int(max_memory * .9e6 / 8 /
                  ((aoslices[:, 3] - aoslices[:, 2]).max() * nao_pair))
    blksize = min(nao, max(2, blksize))

    for k, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = aoslices[ia]
        h1ao = hcore_deriv(ia)
        # MRH: dm1 -> dm_cas in the line below
        de_hcore[k] += np.einsum('xij,ij->x', h1ao, dm_cas)
        de_renorm[k] -= np.einsum('xij,ij->x', s1[:, p0:p1], dme0[p0:p1]) * 2

        q1 = 0
        for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize):
            q0, q1 = q1, q1 + nf
            dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1],
                                mo_cas[q0:q1])
            shls_slice = (shl0, shl1, b0, b1, 0, mol.nbas, 0, mol.nbas)
            eri1 = mol.intor('int2e_ip1',
                             comp=3,
                             aosym='s2kl',
                             shls_slice=shls_slice).reshape(
                                 3, p1 - p0, nf, nao_pair)
            de_eri[k] -= np.einsum('xijw,ijw->x', eri1, dm2_ao) * 2
            eri1 = None
        # MRH: dm1 -> dm_cas in the line below. Also eliminate core-core terms
        de_eri[k] += np.einsum('xij,ij->x', vhf1c[:, p0:p1], dm_cas[p0:p1]) * 2
        de_eri[k] += np.einsum('xij,ij->x', vhf1a[:, p0:p1],
                               dm_core[p0:p1]) * 2

    lib.logger.debug(mc, "CI lagrange hcore component:\n{}".format(de_hcore))
    lib.logger.debug(mc, "CI lagrange renorm component:\n{}".format(de_renorm))
    lib.logger.debug(mc, "CI lagrange eri component:\n{}".format(de_eri))
    de = de_hcore + de_renorm + de_eri
    return de
Exemplo n.º 12
0
def kernel(mp, t2, atmlst=None, mf_grad=None, verbose=logger.INFO):
    if mf_grad is None: mf_grad = mp._scf.nuc_grad_method()

    log = logger.new_logger(mp, verbose)
    time0 = time.clock(), time.time()

    log.debug('Build ump2 rdm1 intermediates')
    d1 = ump2._gamma1_intermediates(mp, t2)
    time1 = log.timer_debug1('rdm1 intermediates', *time0)
    log.debug('Build ump2 rdm2 intermediates')

    mol = mp.mol
    with_frozen = not (mp.frozen is None or mp.frozen is 0)
    moidx = mp.get_frozen_mask()
    OA_a, VA_a, OF_a, VF_a = mp2_grad._index_frozen_active(moidx[0], mp.mo_occ[0])
    OA_b, VA_b, OF_b, VF_b = mp2_grad._index_frozen_active(moidx[1], mp.mo_occ[1])
    orboa = mp.mo_coeff[0][:,OA_a]
    orbva = mp.mo_coeff[0][:,VA_a]
    orbob = mp.mo_coeff[1][:,OA_b]
    orbvb = mp.mo_coeff[1][:,VA_b]
    nao, nocca = orboa.shape
    nvira = orbva.shape[1]
    noccb = orbob.shape[1]
    nvirb = orbvb.shape[1]

# Partially transform MP2 density matrix and hold it in memory
# The rest transformation are applied during the contraction to ERI integrals
    t2aa, t2ab, t2bb = t2
    part_dm2aa = _ao2mo.nr_e2(t2aa.reshape(nocca**2,nvira**2),
                              numpy.asarray(orbva.T, order='F'), (0,nao,0,nao),
                              's1', 's1').reshape(nocca,nocca,nao,nao)
    part_dm2bb = _ao2mo.nr_e2(t2bb.reshape(noccb**2,nvirb**2),
                              numpy.asarray(orbvb.T, order='F'), (0,nao,0,nao),
                              's1', 's1').reshape(noccb,noccb,nao,nao)
    part_dm2ab = lib.einsum('ijab,pa,qb->ipqj', t2ab, orbva, orbvb)
    part_dm2aa = (part_dm2aa.transpose(0,2,3,1) -
                  part_dm2aa.transpose(0,3,2,1)) * .5
    part_dm2bb = (part_dm2bb.transpose(0,2,3,1) -
                  part_dm2bb.transpose(0,3,2,1)) * .5

    hf_dm1a, hf_dm1b = mp._scf.make_rdm1(mp.mo_coeff, mp.mo_occ)
    hf_dm1 = hf_dm1a + hf_dm1b

    if atmlst is None:
        atmlst = range(mol.natm)
    offsetdic = mol.offset_nr_by_atom()
    diagidx = numpy.arange(nao)
    diagidx = diagidx*(diagidx+1)//2 + diagidx
    de = numpy.zeros((len(atmlst),3))
    Imata = numpy.zeros((nao,nao))
    Imatb = numpy.zeros((nao,nao))
    fdm2 = lib.H5TmpFile()
    vhf1 = fdm2.create_dataset('vhf1', (len(atmlst),2,3,nao,nao), 'f8')

# 2e AO integrals dot 2pdm
    max_memory = max(0, mp.max_memory - lib.current_memory()[0])
    blksize = max(1, int(max_memory*.9e6/8/(nao**3*2.5)))

    for k, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = offsetdic[ia]
        ip1 = p0
        vhf = numpy.zeros((2,3,nao,nao))
        for b0, b1, nf in mp2_grad._shell_prange(mol, shl0, shl1, blksize):
            ip0, ip1 = ip1, ip1 + nf
            dm2bufa = lib.einsum('pi,iqrj->pqrj', orboa[ip0:ip1], part_dm2aa)
            dm2bufa+= lib.einsum('qi,iprj->pqrj', orboa, part_dm2aa[:,ip0:ip1])
            dm2bufa = lib.einsum('pqrj,sj->pqrs', dm2bufa, orboa)
            tmp = lib.einsum('pi,iqrj->pqrj', orboa[ip0:ip1], part_dm2ab)
            tmp+= lib.einsum('qi,iprj->pqrj', orboa, part_dm2ab[:,ip0:ip1])
            dm2bufa+= lib.einsum('pqrj,sj->pqrs', tmp, orbob)
            tmp = None
            dm2bufa = dm2bufa + dm2bufa.transpose(0,1,3,2)
            dm2bufa = lib.pack_tril(dm2bufa.reshape(-1,nao,nao)).reshape(nf,nao,-1)
            dm2bufa[:,:,diagidx] *= .5

            dm2bufb = lib.einsum('pi,iqrj->pqrj', orbob[ip0:ip1], part_dm2bb)
            dm2bufb+= lib.einsum('qi,iprj->pqrj', orbob, part_dm2bb[:,ip0:ip1])
            dm2bufb = lib.einsum('pqrj,sj->pqrs', dm2bufb, orbob)
            tmp = lib.einsum('iqrj,sj->iqrs', part_dm2ab, orbob[ip0:ip1])
            tmp+= lib.einsum('iqrj,sj->iqsr', part_dm2ab[:,:,ip0:ip1], orbob)
            dm2bufb+= lib.einsum('pi,iqrs->srpq', orboa, tmp)
            tmp = None
            dm2bufb = dm2bufb + dm2bufb.transpose(0,1,3,2)
            dm2bufb = lib.pack_tril(dm2bufb.reshape(-1,nao,nao)).reshape(nf,nao,-1)
            dm2bufb[:,:,diagidx] *= .5

            shls_slice = (b0,b1,0,mol.nbas,0,mol.nbas,0,mol.nbas)
            eri0 = mol.intor('int2e', aosym='s2kl', shls_slice=shls_slice)
            Imata += lib.einsum('ipx,iqx->pq', eri0.reshape(nf,nao,-1), dm2bufa)
            Imatb += lib.einsum('ipx,iqx->pq', eri0.reshape(nf,nao,-1), dm2bufb)
            eri0 = None

            eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl',
                             shls_slice=shls_slice).reshape(3,nf,nao,-1)
            de[k] -= numpy.einsum('xijk,ijk->x', eri1, dm2bufa) * 2
            de[k] -= numpy.einsum('xijk,ijk->x', eri1, dm2bufb) * 2
            dm2bufa = dm2bufb = None
# HF part
            for i in range(3):
                eri1tmp = lib.unpack_tril(eri1[i].reshape(nf*nao,-1))
                eri1tmp = eri1tmp.reshape(nf,nao,nao,nao)
                vhf[:,i] += numpy.einsum('ijkl,ij->kl', eri1tmp, hf_dm1[ip0:ip1])
                vhf[0,i] -= numpy.einsum('ijkl,il->kj', eri1tmp, hf_dm1a[ip0:ip1])
                vhf[1,i] -= numpy.einsum('ijkl,il->kj', eri1tmp, hf_dm1b[ip0:ip1])
                vhf[:,i,ip0:ip1] += numpy.einsum('ijkl,kl->ij', eri1tmp, hf_dm1)
                vhf[0,i,ip0:ip1] -= numpy.einsum('ijkl,jk->il', eri1tmp, hf_dm1a)
                vhf[1,i,ip0:ip1] -= numpy.einsum('ijkl,jk->il', eri1tmp, hf_dm1b)
            eri1 = eri1tmp = None
        vhf1[k] = vhf
        log.debug('2e-part grad of atom %d %s = %s', ia, mol.atom_symbol(ia), de[k])
        time1 = log.timer_debug1('2e-part grad of atom %d'%ia, *time1)

# Recompute nocc, nvir to include the frozen orbitals and make contraction for
# the 1-particle quantities, see also the kernel function in uccsd_grad module.
    mo_a, mo_b = mp.mo_coeff
    mo_ea, mo_eb = mp._scf.mo_energy
    nao, nmoa = mo_a.shape
    nmob = mo_b.shape[1]
    nocca = numpy.count_nonzero(mp.mo_occ[0] > 0)
    noccb = numpy.count_nonzero(mp.mo_occ[1] > 0)
    s0 = mp._scf.get_ovlp()
    Imata = reduce(numpy.dot, (mo_a.T, Imata, s0, mo_a)) * -1
    Imatb = reduce(numpy.dot, (mo_b.T, Imatb, s0, mo_b)) * -1

    dm1a = numpy.zeros((nmoa,nmoa))
    dm1b = numpy.zeros((nmob,nmob))
    doo, dOO = d1[0]
    dvv, dVV = d1[1]
    if with_frozen:
        dco = Imata[OF_a[:,None],OA_a] / (mo_ea[OF_a,None] - mo_ea[OA_a])
        dfv = Imata[VF_a[:,None],VA_a] / (mo_ea[VF_a,None] - mo_ea[VA_a])
        dm1a[OA_a[:,None],OA_a] = (doo + doo.T) * .5
        dm1a[OF_a[:,None],OA_a] = dco
        dm1a[OA_a[:,None],OF_a] = dco.T
        dm1a[VA_a[:,None],VA_a] = (dvv + dvv.T) * .5
        dm1a[VF_a[:,None],VA_a] = dfv
        dm1a[VA_a[:,None],VF_a] = dfv.T
        dco = Imatb[OF_b[:,None],OA_b] / (mo_eb[OF_b,None] - mo_eb[OA_b])
        dfv = Imatb[VF_b[:,None],VA_b] / (mo_eb[VF_b,None] - mo_eb[VA_b])
        dm1b[OA_b[:,None],OA_b] = (dOO + dOO.T) * .5
        dm1b[OF_b[:,None],OA_b] = dco
        dm1b[OA_b[:,None],OF_b] = dco.T
        dm1b[VA_b[:,None],VA_b] = (dVV + dVV.T) * .5
        dm1b[VF_b[:,None],VA_b] = dfv
        dm1b[VA_b[:,None],VF_b] = dfv.T
    else:
        dm1a[:nocca,:nocca] = (doo + doo.T) * .5
        dm1a[nocca:,nocca:] = (dvv + dvv.T) * .5
        dm1b[:noccb,:noccb] = (dOO + dOO.T) * .5
        dm1b[noccb:,noccb:] = (dVV + dVV.T) * .5

    dm1 = (reduce(numpy.dot, (mo_a, dm1a, mo_a.T)),
           reduce(numpy.dot, (mo_b, dm1b, mo_b.T)))
    vhf = mp._scf.get_veff(mp.mol, dm1)
    Xvo = reduce(numpy.dot, (mo_a[:,nocca:].T, vhf[0], mo_a[:,:nocca]))
    XVO = reduce(numpy.dot, (mo_b[:,noccb:].T, vhf[1], mo_b[:,:noccb]))
    Xvo+= Imata[:nocca,nocca:].T - Imata[nocca:,:nocca]
    XVO+= Imatb[:noccb,noccb:].T - Imatb[noccb:,:noccb]

    dm1_resp = _response_dm1(mp, (Xvo,XVO))
    dm1a += dm1_resp[0]
    dm1b += dm1_resp[1]
    time1 = log.timer_debug1('response_rdm1 intermediates', *time1)

    Imata[nocca:,:nocca] = Imata[:nocca,nocca:].T
    Imatb[noccb:,:noccb] = Imatb[:noccb,noccb:].T
    im1 = reduce(numpy.dot, (mo_a, Imata, mo_a.T))
    im1+= reduce(numpy.dot, (mo_b, Imatb, mo_b.T))
    time1 = log.timer_debug1('response_rdm1', *time1)

    log.debug('h1 and JK1')
    hcore_deriv = mf_grad.hcore_generator(mol)
    s1 = mf_grad.get_ovlp(mol)
    zeta = (mo_ea[:,None] + mo_ea) * .5
    zeta[nocca:,:nocca] = mo_ea[:nocca]
    zeta[:nocca,nocca:] = mo_ea[:nocca].reshape(-1,1)
    zeta_a = reduce(numpy.dot, (mo_a, zeta*dm1a, mo_a.T))
    zeta = (mo_eb[:,None] + mo_eb) * .5
    zeta[noccb:,:noccb] = mo_eb[:noccb]
    zeta[:noccb,noccb:] = mo_eb[:noccb].reshape(-1,1)
    zeta_b = reduce(numpy.dot, (mo_b, zeta*dm1b, mo_b.T))

    dm1 = (reduce(numpy.dot, (mo_a, dm1a, mo_a.T)),
           reduce(numpy.dot, (mo_b, dm1b, mo_b.T)))
    vhf_s1occ = mp._scf.get_veff(mol, (dm1[0]+dm1[0].T, dm1[1]+dm1[1].T))
    p1a = numpy.dot(mo_a[:,:nocca], mo_a[:,:nocca].T)
    p1b = numpy.dot(mo_b[:,:noccb], mo_b[:,:noccb].T)
    vhf_s1occ = (reduce(numpy.dot, (p1a, vhf_s1occ[0], p1a)) +
                 reduce(numpy.dot, (p1b, vhf_s1occ[1], p1b))) * .5
    time1 = log.timer_debug1('h1 and JK1', *time1)

    # Hartree-Fock part contribution
    dm1pa = hf_dm1a + dm1[0]*2
    dm1pb = hf_dm1b + dm1[1]*2
    dm1 = dm1[0] + dm1[1] + hf_dm1
    zeta_a += rhf_grad.make_rdm1e(mo_ea, mo_a, mp.mo_occ[0])
    zeta_b += rhf_grad.make_rdm1e(mo_eb, mo_b, mp.mo_occ[1])
    zeta = zeta_a + zeta_b

    for k, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = offsetdic[ia]
# s[1] dot I, note matrix im1 is not hermitian
        de[k] += numpy.einsum('xij,ij->x', s1[:,p0:p1], im1[p0:p1])
        de[k] += numpy.einsum('xji,ij->x', s1[:,p0:p1], im1[:,p0:p1])
# h[1] \dot DM, contribute to f1
        h1ao = hcore_deriv(ia)
        de[k] += numpy.einsum('xij,ji->x', h1ao, dm1)
# -s[1]*e \dot DM,  contribute to f1
        de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], zeta[p0:p1]  )
        de[k] -= numpy.einsum('xji,ij->x', s1[:,p0:p1], zeta[:,p0:p1])
# -vhf[s_ij[1]],  contribute to f1, *2 for s1+s1.T
        de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], vhf_s1occ[p0:p1]) * 2
        de[k] -= numpy.einsum('xij,ij->x', vhf1[k,0], dm1pa)
        de[k] -= numpy.einsum('xij,ij->x', vhf1[k,1], dm1pb)

    de += mf_grad.grad_nuc(mol)
    log.timer('%s gradients' % mp.__class__.__name__, *time0)
    return de
Exemplo n.º 13
0
def kernel(mycc, t1=None, t2=None, l1=None, l2=None, eris=None, atmlst=None,
           mf_grad=None, d1=None, d2=None, verbose=logger.INFO):
    if eris is not None:
        if abs(eris.fock - numpy.diag(eris.fock.diagonal())).max() > 1e-3:
            raise RuntimeError('CCSD gradients does not support NHF (non-canonical HF)')

    if t1 is None: t1 = mycc.t1
    if t2 is None: t2 = mycc.t2
    if l1 is None: l1 = mycc.l1
    if l2 is None: l2 = mycc.l2
    if mf_grad is None: mf_grad = mycc._scf.nuc_grad_method()

    log = logger.new_logger(mycc, verbose)
    time0 = time.clock(), time.time()

    log.debug('Build ccsd rdm1 intermediates')
    if d1 is None:
        d1 = ccsd_rdm._gamma1_intermediates(mycc, t1, t2, l1, l2)
    doo, dov, dvo, dvv = d1
    time1 = log.timer_debug1('rdm1 intermediates', *time0)
    log.debug('Build ccsd rdm2 intermediates')
    fdm2 = lib.H5TmpFile()
    if d2 is None:
        d2 = ccsd_rdm._gamma2_outcore(mycc, t1, t2, l1, l2, fdm2, True)
    time1 = log.timer_debug1('rdm2 intermediates', *time1)

    mol = mycc.mol
    mo_coeff = mycc.mo_coeff
    mo_energy = mycc._scf.mo_energy
    nao, nmo = mo_coeff.shape
    nocc = numpy.count_nonzero(mycc.mo_occ > 0)
    with_frozen = not (mycc.frozen is None or mycc.frozen is 0)
    OA, VA, OF, VF = _index_frozen_active(mycc.get_frozen_mask(), mycc.mo_occ)

    log.debug('symmetrized rdm2 and MO->AO transformation')
# Roughly, dm2*2 is computed in _rdm2_mo2ao
    mo_active = mo_coeff[:,numpy.hstack((OA,VA))]
    _rdm2_mo2ao(mycc, d2, mo_active, fdm2)  # transform the active orbitals
    time1 = log.timer_debug1('MO->AO transformation', *time1)
    hf_dm1 = mycc._scf.make_rdm1(mycc.mo_coeff, mycc.mo_occ)

    if atmlst is None:
        atmlst = range(mol.natm)
    offsetdic = mol.offset_nr_by_atom()
    diagidx = numpy.arange(nao)
    diagidx = diagidx*(diagidx+1)//2 + diagidx
    de = numpy.zeros((len(atmlst),3))
    Imat = numpy.zeros((nao,nao))
    vhf1 = fdm2.create_dataset('vhf1', (len(atmlst),3,nao,nao), 'f8')

# 2e AO integrals dot 2pdm
    max_memory = max(0, mycc.max_memory - lib.current_memory()[0])
    blksize = max(1, int(max_memory*.9e6/8/(nao**3*2.5)))

    for k, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = offsetdic[ia]
        ip1 = p0
        vhf = numpy.zeros((3,nao,nao))
        for b0, b1, nf in _shell_prange(mol, shl0, shl1, blksize):
            ip0, ip1 = ip1, ip1 + nf
            dm2buf = _load_block_tril(fdm2['dm2'], ip0, ip1, nao)
            dm2buf[:,:,diagidx] *= .5
            shls_slice = (b0,b1,0,mol.nbas,0,mol.nbas,0,mol.nbas)
            eri0 = mol.intor('int2e', aosym='s2kl', shls_slice=shls_slice)
            Imat += lib.einsum('ipx,iqx->pq', eri0.reshape(nf,nao,-1), dm2buf)
            eri0 = None

            eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl',
                             shls_slice=shls_slice).reshape(3,nf,nao,-1)
            de[k] -= numpy.einsum('xijk,ijk->x', eri1, dm2buf) * 2
            dm2buf = None
# HF part
            for i in range(3):
                eri1tmp = lib.unpack_tril(eri1[i].reshape(nf*nao,-1))
                eri1tmp = eri1tmp.reshape(nf,nao,nao,nao)
                vhf[i] += numpy.einsum('ijkl,ij->kl', eri1tmp, hf_dm1[ip0:ip1])
                vhf[i] -= numpy.einsum('ijkl,il->kj', eri1tmp, hf_dm1[ip0:ip1]) * .5
                vhf[i,ip0:ip1] += numpy.einsum('ijkl,kl->ij', eri1tmp, hf_dm1)
                vhf[i,ip0:ip1] -= numpy.einsum('ijkl,jk->il', eri1tmp, hf_dm1) * .5
            eri1 = eri1tmp = None
        vhf1[k] = vhf
        log.debug('2e-part grad of atom %d %s = %s', ia, mol.atom_symbol(ia), de[k])
        time1 = log.timer_debug1('2e-part grad of atom %d'%ia, *time1)

    Imat = reduce(numpy.dot, (mo_coeff.T, Imat, mycc._scf.get_ovlp(), mo_coeff)) * -1

    dm1mo = numpy.zeros((nmo,nmo))
    if with_frozen:
        dco = Imat[OF[:,None],OA] / (mo_energy[OF,None] - mo_energy[OA])
        dfv = Imat[VF[:,None],VA] / (mo_energy[VF,None] - mo_energy[VA])
        dm1mo[OA[:,None],OA] = doo + doo.T
        dm1mo[OF[:,None],OA] = dco
        dm1mo[OA[:,None],OF] = dco.T
        dm1mo[VA[:,None],VA] = dvv + dvv.T
        dm1mo[VF[:,None],VA] = dfv
        dm1mo[VA[:,None],VF] = dfv.T
    else:
        dm1mo[:nocc,:nocc] = doo + doo.T
        dm1mo[nocc:,nocc:] = dvv + dvv.T

    dm1 = reduce(numpy.dot, (mo_coeff, dm1mo, mo_coeff.T))
    vhf = mycc._scf.get_veff(mycc.mol, dm1) * 2
    Xvo = reduce(numpy.dot, (mo_coeff[:,nocc:].T, vhf, mo_coeff[:,:nocc]))
    Xvo+= Imat[:nocc,nocc:].T - Imat[nocc:,:nocc]

    dm1mo += _response_dm1(mycc, Xvo, eris)
    time1 = log.timer_debug1('response_rdm1 intermediates', *time1)

    Imat[nocc:,:nocc] = Imat[:nocc,nocc:].T
    im1 = reduce(numpy.dot, (mo_coeff, Imat, mo_coeff.T))
    time1 = log.timer_debug1('response_rdm1', *time1)

    log.debug('h1 and JK1')
    hcore_deriv = mf_grad.hcore_generator(mol)
    s1 = mf_grad.get_ovlp(mol)
    zeta = lib.direct_sum('i+j->ij', mo_energy, mo_energy) * .5
    zeta[nocc:,:nocc] = mo_energy[:nocc]
    zeta[:nocc,nocc:] = mo_energy[:nocc].reshape(-1,1)
    zeta = reduce(numpy.dot, (mo_coeff, zeta*dm1mo, mo_coeff.T))

    dm1 = reduce(numpy.dot, (mo_coeff, dm1mo, mo_coeff.T))
    p1 = numpy.dot(mo_coeff[:,:nocc], mo_coeff[:,:nocc].T)
    vhf_s1occ = reduce(numpy.dot, (p1, mycc._scf.get_veff(mol, dm1+dm1.T), p1))
    time1 = log.timer_debug1('h1 and JK1', *time1)

    # Hartree-Fock part contribution
    dm1p = hf_dm1 + dm1*2
    dm1 += hf_dm1
    zeta += mf_grad.make_rdm1e(mo_energy, mo_coeff, mycc.mo_occ)

    for k, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = offsetdic[ia]
# s[1] dot I, note matrix im1 is not hermitian
        de[k] += numpy.einsum('xij,ij->x', s1[:,p0:p1], im1[p0:p1])
        de[k] += numpy.einsum('xji,ij->x', s1[:,p0:p1], im1[:,p0:p1])
# h[1] \dot DM, contribute to f1
        h1ao = hcore_deriv(ia)
        de[k] += numpy.einsum('xij,ji->x', h1ao, dm1)
# -s[1]*e \dot DM,  contribute to f1
        de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], zeta[p0:p1]  )
        de[k] -= numpy.einsum('xji,ij->x', s1[:,p0:p1], zeta[:,p0:p1])
# -vhf[s_ij[1]],  contribute to f1, *2 for s1+s1.T
        de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], vhf_s1occ[p0:p1]) * 2
        de[k] -= numpy.einsum('xij,ij->x', vhf1[k], dm1p)

    de += mf_grad.grad_nuc(mol, atmlst)
    log.timer('%s gradients' % mycc.__class__.__name__, *time0)
    return de
Exemplo n.º 14
0
def Lorb_dot_dgorb_dx(Lorb,
                      mc,
                      mo_coeff=None,
                      ci=None,
                      atmlst=None,
                      mf_grad=None,
                      eris=None,
                      verbose=None):
    ''' Modification of single-state CASSCF electronic energy nuclear gradient to compute instead
    the orbital Lagrange term nuclear gradient:

    sum_pq Lorb_pq d2_Ecas/d_lambda d_kpq

    This involves the effective density matrices
    ~D_pq   = L_pr*D_rq   + L_qr*D_pr
    ~d_pqrs = L_pt*d_tqrs + L_rt*d_pqts + L_qt*d_ptrs + L_st*d_pqrt
    (NB: L_pq = -L_qp)
    '''

    # dmo = smoT.dao.smo
    # dao = mo.dmo.moT
    t0 = (logger.process_clock(), logger.perf_counter())

    if mo_coeff is None: mo_coeff = mc.mo_coeff
    if ci is None: ci = mc.ci
    if mf_grad is None: mf_grad = mc._scf.nuc_grad_method()
    if mc.frozen is not None:
        raise NotImplementedError

    mol = mc.mol
    ncore = mc.ncore
    ncas = mc.ncas
    nocc = ncore + ncas
    nelecas = mc.nelecas
    nao, nmo = mo_coeff.shape
    nao_pair = nao * (nao + 1) // 2

    mo_core = mo_coeff[:, :ncore]
    mo_cas = mo_coeff[:, ncore:nocc]

    # MRH: new 'effective' MO coefficients including contraction from the Lagrange multipliers
    moL_coeff = np.dot(mo_coeff, Lorb)
    s0_inv = np.dot(mo_coeff, mo_coeff.T)
    moL_core = moL_coeff[:, :ncore]
    moL_cas = moL_coeff[:, ncore:nocc]

    # MRH: these SHOULD be state-averaged! Use the actual sacasscf object!
    casdm1, casdm2 = mc.fcisolver.make_rdm12(ci, ncas, nelecas)

    # gfock = Generalized Fock, Adv. Chem. Phys., 69, 63
    dm_core = np.dot(mo_core, mo_core.T) * 2
    dm_cas = reduce(np.dot, (mo_cas, casdm1, mo_cas.T))
    # MRH: new density matrix terms
    dmL_core = np.dot(moL_core, mo_core.T) * 2
    dmL_cas = reduce(np.dot, (moL_cas, casdm1, mo_cas.T))
    dmL_core += dmL_core.T
    dmL_cas += dmL_cas.T
    dm1 = dm_core + dm_cas
    dm1L = dmL_core + dmL_cas
    # MRH: wrap the integral instead of the density matrix.
    # g_prst*~d_qrst = (g_pust*L_ur + g_prut*L_us + g_prsu*L_ut)*d_qrst + g_prst*L_uq*d_urst
    #                = 'aapaL'_prst*d_qrst        [ERI TERM 1]
    #                = 'aapa'_prst*L_uq*d_urst    [ERI TERM 2]
    aapa = np.zeros((ncas, ncas, nmo, ncas), dtype=dm_cas.dtype)
    aapaL = np.zeros((ncas, ncas, nmo, ncas), dtype=dm_cas.dtype)
    for i in range(nmo):
        jbuf = eris.ppaa[i]
        kbuf = eris.papa[i]
        aapa[:, :, i, :] = jbuf[ncore:nocc, :, :].transpose(1, 2, 0)
        aapaL[:, :, i, :] += np.tensordot(jbuf,
                                          Lorb[:, ncore:nocc],
                                          axes=((0), (0)))
        kbuf = np.tensordot(kbuf, Lorb[:, ncore:nocc],
                            axes=((1), (0))).transpose(1, 2, 0)
        aapaL[:, :, i, :] += kbuf + kbuf.transpose(1, 0, 2)
    # MRH: new vhf terms
    vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas))
    vjL, vkL = mc._scf.get_jk(mol, (dmL_core, dmL_cas))
    h1 = mc.get_hcore()
    vhf_c = vj[0] - vk[0] * .5
    vhf_a = vj[1] - vk[1] * .5
    vhfL_c = vjL[0] - vkL[0] * .5
    vhfL_a = vjL[1] - vkL[1] * .5
    gfock = np.dot(h1, dm1L)  # h1e
    gfock += np.dot((vhf_c + vhf_a),
                    dmL_core)  # core-core and active-core, 2nd 1RDM linked
    gfock += np.dot((vhfL_c + vhfL_a),
                    dm_core)  # core-core and active-core, 1st 1RDM linked
    gfock += np.dot(vhfL_c, dm_cas)  # core-active, 1st 1RDM linked
    gfock += np.dot(vhf_c, dmL_cas)  # core-active, 2nd 1RDM linked
    gfock = np.dot(
        s0_inv,
        gfock)  # Definition in MO's; going (AO->MO->AO) incurs inverse ovlp
    # [ERI TERM 1]
    gfock += reduce(
        np.dot,
        (mo_coeff, np.einsum('uviw,uvtw->it', aapaL, casdm2), mo_cas.T))
    # [ERI TERM 2]
    gfock += reduce(
        np.dot,
        (mo_coeff, np.einsum('uviw,vuwt->it', aapa, casdm2), moL_cas.T))
    dme0 = (gfock +
            gfock.T) / 2  # This transpose is for the overlap matrix later on
    aapa = vj = vk = vhf_c = vhf_a = None

    vj, vk = mf_grad.get_jk(mol, (dm_core, dm_cas, dmL_core, dmL_cas))
    vhf1c, vhf1a, vhf1cL, vhf1aL = vj - vk * 0.5
    hcore_deriv = mf_grad.hcore_generator(mol)
    s1 = mf_grad.get_ovlp(mol)

    diag_idx = np.arange(nao)
    diag_idx = diag_idx * (diag_idx + 1) // 2 + diag_idx
    casdm2_cc = casdm2 + casdm2.transpose(0, 1, 3, 2)
    dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2, ncas**2), mo_cas.T,
                                (0, nao, 0, nao)).reshape(ncas**2, nao, nao)
    # MRH: contract the final two indices of the active-active 2RDM with L as you change to AOs
    # note tensordot always puts indices in the order of the arguments.
    dm2Lbuf = np.zeros((ncas**2, nmo, nmo))
    # MRH: The second line below transposes the L; the third line transposes the derivative
    # Both the L and the derivative have to explore all indices
    Lcasdm2 = np.tensordot(Lorb[:, ncore:nocc], casdm2,
                           axes=(1, 2)).transpose(1, 2, 0, 3)
    dm2Lbuf[:, :, ncore:nocc] = Lcasdm2.reshape(ncas**2, nmo, ncas)
    Lcasdm2 = np.tensordot(Lorb[:, ncore:nocc], casdm2,
                           axes=(1, 3)).transpose(1, 2, 3, 0)
    dm2Lbuf[:, ncore:nocc, :] += Lcasdm2.reshape(ncas**2, ncas, nmo)
    Lcasdm2 = None
    dm2Lbuf += dm2Lbuf.transpose(0, 2, 1)
    dm2Lbuf = np.ascontiguousarray(dm2Lbuf)
    dm2Lbuf = ao2mo._ao2mo.nr_e2(dm2Lbuf.reshape(ncas**2, nmo**2), mo_coeff.T,
                                 (0, nao, 0, nao)).reshape(ncas**2, nao, nao)
    dm2buf = lib.pack_tril(dm2buf)
    dm2buf[:, diag_idx] *= .5
    dm2buf = dm2buf.reshape(ncas, ncas, nao_pair)
    dm2Lbuf = lib.pack_tril(dm2Lbuf)
    dm2Lbuf[:, diag_idx] *= .5
    dm2Lbuf = dm2Lbuf.reshape(ncas, ncas, nao_pair)

    if atmlst is None:
        atmlst = list(range(mol.natm))
    aoslices = mol.aoslice_by_atom()
    de_hcore = np.zeros((len(atmlst), 3))
    de_renorm = np.zeros((len(atmlst), 3))
    de_eri = np.zeros((len(atmlst), 3))
    de = np.zeros((len(atmlst), 3))

    max_memory = mc.max_memory - lib.current_memory()[0]
    blksize = int(max_memory * .9e6 / 8 /
                  (4 * (aoslices[:, 3] - aoslices[:, 2]).max() * nao_pair))
    # MRH: 3 components of eri array and 1 density matrix array:
    # FOUR arrays of this size are required!
    blksize = min(nao, max(2, blksize))
    logger.info(
        mc,
        'SA-CASSCF Lorb_dot_dgorb memory remaining for eri manipulation: %f MB; using'
        ' blocksize = %d', max_memory, blksize)
    t0 = logger.timer(mc, 'SA-CASSCF Lorb_dot_dgorb 1-electron part', *t0)

    for k, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = aoslices[ia]
        h1ao = hcore_deriv(ia)
        # MRH: h1e and Feff terms
        de_hcore[k] += np.einsum('xij,ij->x', h1ao, dm1L)
        de_renorm[k] -= np.einsum('xij,ij->x', s1[:, p0:p1], dme0[p0:p1]) * 2

        q1 = 0
        for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize):
            q0, q1 = q1, q1 + nf
            dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2Lbuf, mo_cas[p0:p1],
                                mo_cas[q0:q1])
            # MRH: contract first two indices of active-active 2RDM with L as you go MOs -> AOs
            dm2_ao += lib.einsum('ijw,pi,qj->pqw', dm2buf, moL_cas[p0:p1],
                                 mo_cas[q0:q1])
            dm2_ao += lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1],
                                 moL_cas[q0:q1])
            shls_slice = (shl0, shl1, b0, b1, 0, mol.nbas, 0, mol.nbas)
            eri1 = mol.intor('int2e_ip1',
                             comp=3,
                             aosym='s2kl',
                             shls_slice=shls_slice).reshape(
                                 3, p1 - p0, nf, nao_pair)
            # MRH: I still don't understand why there is a minus here!
            de_eri[k] -= np.einsum('xijw,ijw->x', eri1, dm2_ao) * 2
            eri1 = dm2_ao = None
            t0 = logger.timer(
                mc, 'SA-CASSCF Lorb_dot_dgorb atom {} ({},{}|{})'.format(
                    ia, p1 - p0, nf, nao_pair), *t0)
        # MRH: core-core and core-active 2RDM terms
        de_eri[k] += np.einsum('xij,ij->x', vhf1c[:, p0:p1], dm1L[p0:p1]) * 2
        de_eri[k] += np.einsum('xij,ij->x', vhf1cL[:, p0:p1], dm1[p0:p1]) * 2
        # MRH: active-core 2RDM terms
        de_eri[k] += np.einsum('xij,ij->x', vhf1a[:, p0:p1],
                               dmL_core[p0:p1]) * 2
        de_eri[k] += np.einsum('xij,ij->x', vhf1aL[:, p0:p1],
                               dm_core[p0:p1]) * 2

    # MRH: deleted the nuclear-nuclear part to avoid double-counting
    # lesson learned from debugging - mol.intor computes -1 * the derivative and only
    # for one index
    # on the other hand, mf_grad.hcore_generator computes the actual derivative of
    # h1 for both indices and with the correct sign

    logger.debug(mc, "Orb lagrange hcore component:\n{}".format(de_hcore))
    logger.debug(mc, "Orb lagrange renorm component:\n{}".format(de_renorm))
    logger.debug(mc, "Orb lagrange eri component:\n{}".format(de_eri))
    de = de_hcore + de_renorm + de_eri

    return de
def make_rdm1_with_orbital_response(mp):
    import time
    from pyscf import lib
    from pyscf.grad.mp2 import _response_dm1, _index_frozen_active, _shell_prange
    from pyscf.mp import mp2
    from pyscf.ao2mo import _ao2mo
    log = lib.logger.new_logger(mp)
    time0 = time.clock(), time.time()
    mol = mp.mol

    log.debug('Build mp2 rdm1 intermediates')
    d1 = mp2._gamma1_intermediates(mp, mp.t2)
    doo, dvv = d1
    time1 = log.timer_debug1('rdm1 intermediates', *time0)

    with_frozen = not (mp.frozen is None or mp.frozen is 0)
    OA, VA, OF, VF = _index_frozen_active(mp.get_frozen_mask(), mp.mo_occ)
    orbo = mp.mo_coeff[:, OA]
    orbv = mp.mo_coeff[:, VA]
    nao, nocc = orbo.shape
    nvir = orbv.shape[1]

    # Partially transform MP2 density matrix and hold it in memory
    # The rest transformation are applied during the contraction to ERI integrals
    part_dm2 = _ao2mo.nr_e2(mp.t2.reshape(nocc**2, nvir**2),
                            numpy.asarray(orbv.T, order='F'), (0, nao, 0, nao),
                            's1', 's1').reshape(nocc, nocc, nao, nao)
    part_dm2 = (part_dm2.transpose(0, 2, 3, 1) * 4 -
                part_dm2.transpose(0, 3, 2, 1) * 2)

    offsetdic = mol.offset_nr_by_atom()
    diagidx = numpy.arange(nao)
    diagidx = diagidx * (diagidx + 1) // 2 + diagidx
    Imat = numpy.zeros((nao, nao))

    # 2e AO integrals dot 2pdm
    max_memory = max(0, mp.max_memory - lib.current_memory()[0])
    blksize = max(1, int(max_memory * .9e6 / 8 / (nao**3 * 2.5)))

    for ia in range(mol.natm):
        shl0, shl1, p0, p1 = offsetdic[ia]
        ip1 = p0
        for b0, b1, nf in _shell_prange(mol, shl0, shl1, blksize):
            ip0, ip1 = ip1, ip1 + nf
            dm2buf = lib.einsum('pi,iqrj->pqrj', orbo[ip0:ip1], part_dm2)
            dm2buf += lib.einsum('qi,iprj->pqrj', orbo, part_dm2[:, ip0:ip1])
            dm2buf = lib.einsum('pqrj,sj->pqrs', dm2buf, orbo)
            dm2buf = dm2buf + dm2buf.transpose(0, 1, 3, 2)
            dm2buf = lib.pack_tril(dm2buf.reshape(-1, nao,
                                                  nao)).reshape(nf, nao, -1)
            dm2buf[:, :, diagidx] *= .5

            shls_slice = (b0, b1, 0, mol.nbas, 0, mol.nbas, 0, mol.nbas)
            eri0 = mol.intor('int2e', aosym='s2kl', shls_slice=shls_slice)
            Imat += lib.einsum('ipx,iqx->pq', eri0.reshape(nf, nao, -1),
                               dm2buf)
            eri0 = None
            dm2buf = None
        time1 = log.timer_debug1('2e-part grad of atom %d' % ia, *time1)

# Recompute nocc, nvir to include the frozen orbitals and make contraction for
# the 1-particle quantities, see also the kernel function in ccsd_grad module.
    mo_coeff = mp.mo_coeff
    mo_energy = mp._scf.mo_energy
    nao, nmo = mo_coeff.shape
    nocc = numpy.count_nonzero(mp.mo_occ > 0)
    Imat = reduce(numpy.dot,
                  (mo_coeff.T, Imat, mp._scf.get_ovlp(), mo_coeff)) * -1

    dm1mo = numpy.zeros((nmo, nmo))
    if with_frozen:
        dco = Imat[OF[:, None], OA] / (mo_energy[OF, None] - mo_energy[OA])
        dfv = Imat[VF[:, None], VA] / (mo_energy[VF, None] - mo_energy[VA])
        dm1mo[OA[:, None], OA] = doo + doo.T
        dm1mo[OF[:, None], OA] = dco
        dm1mo[OA[:, None], OF] = dco.T
        dm1mo[VA[:, None], VA] = dvv + dvv.T
        dm1mo[VF[:, None], VA] = dfv
        dm1mo[VA[:, None], VF] = dfv.T
    else:
        dm1mo[:nocc, :nocc] = doo + doo.T
        dm1mo[nocc:, nocc:] = dvv + dvv.T

    dm1 = reduce(numpy.dot, (mo_coeff, dm1mo, mo_coeff.T))
    vhf = mp._scf.get_veff(mp.mol, dm1) * 2
    Xvo = reduce(numpy.dot, (mo_coeff[:, nocc:].T, vhf, mo_coeff[:, :nocc]))
    Xvo += Imat[:nocc, nocc:].T - Imat[nocc:, :nocc]

    dm1mo += _response_dm1(mp, Xvo)

    # Transform to AO basis
    dm1 = reduce(numpy.dot, (mo_coeff, dm1mo, mo_coeff.T))
    dm1 += mp._scf.make_rdm1(mp.mo_coeff, mp.mo_occ)
    return dm1
Exemplo n.º 16
0
def kernel(mc, mo_coeff=None, ci=None, atmlst=None, mf_grad=None,
           verbose=None):
    if mo_coeff is None: mo_coeff = mc.mo_coeff
    if ci is None: ci = mc.ci
    if mf_grad is None: mf_grad = mc._scf.nuc_grad_method()
    if mc.frozen is not None:
        raise NotImplementedError

    mol = mc.mol
    ncore = mc.ncore
    ncas = mc.ncas
    nocc = ncore + ncas
    nelecas = mc.nelecas
    nao, nmo = mo_coeff.shape
    nao_pair = nao * (nao+1) // 2

    mo_occ = mo_coeff[:,:nocc]
    mo_core = mo_coeff[:,:ncore]
    mo_cas = mo_coeff[:,ncore:nocc]

    casdm1, casdm2 = mc.fcisolver.make_rdm12(ci, ncas, nelecas)

# gfock = Generalized Fock, Adv. Chem. Phys., 69, 63
    dm_core = numpy.dot(mo_core, mo_core.T) * 2
    dm_cas = reduce(numpy.dot, (mo_cas, casdm1, mo_cas.T))
    aapa = ao2mo.kernel(mol, (mo_cas, mo_cas, mo_occ, mo_cas), compact=False)
    aapa = aapa.reshape(ncas,ncas,nocc,ncas)
    vj, vk = mc._scf.get_jk(mol, (dm_core, dm_cas))
    h1 = mc.get_hcore()
    vhf_c = vj[0] - vk[0] * .5
    vhf_a = vj[1] - vk[1] * .5
    gfock = reduce(numpy.dot, (mo_occ.T, h1 + vhf_c + vhf_a, mo_occ)) * 2
    gfock[:,ncore:nocc] = reduce(numpy.dot, (mo_occ.T, h1 + vhf_c, mo_cas, casdm1))
    gfock[:,ncore:nocc] += numpy.einsum('uviw,vuwt->it', aapa, casdm2)
    dme0 = reduce(numpy.dot, (mo_occ, (gfock+gfock.T)*.5, mo_occ.T))
    aapa = vj = vk = vhf_c = vhf_a = h1 = gfock = None

    dm1 = dm_core + dm_cas
    vhf1c, vhf1a = mf_grad.get_veff(mol, (dm_core, dm_cas))
    hcore_deriv = mf_grad.hcore_generator(mol)
    s1 = mf_grad.get_ovlp(mol)

    diag_idx = numpy.arange(nao)
    diag_idx = diag_idx * (diag_idx+1) // 2 + diag_idx
    casdm2_cc = casdm2 + casdm2.transpose(0,1,3,2)
    dm2buf = ao2mo._ao2mo.nr_e2(casdm2_cc.reshape(ncas**2,ncas**2), mo_cas.T,
                                (0, nao, 0, nao)).reshape(ncas**2,nao,nao)
    dm2buf = lib.pack_tril(dm2buf)
    dm2buf[:,diag_idx] *= .5
    dm2buf = dm2buf.reshape(ncas,ncas,nao_pair)
    casdm2 = casdm2_cc = None

    if atmlst is None:
        atmlst = range(mol.natm)
    aoslices = mol.aoslice_by_atom()
    de = numpy.zeros((len(atmlst),3))

    max_memory = mc.max_memory - lib.current_memory()[0]
    blksize = int(max_memory*.9e6/8 / ((aoslices[:,3]-aoslices[:,2]).max()*nao_pair))
    blksize = min(nao, max(2, blksize))

    for k, ia in enumerate(atmlst):
        shl0, shl1, p0, p1 = aoslices[ia]
        h1ao = hcore_deriv(ia)
        de[k] += numpy.einsum('xij,ij->x', h1ao, dm1)
        de[k] -= numpy.einsum('xij,ij->x', s1[:,p0:p1], dme0[p0:p1]) * 2

        q1 = 0
        for b0, b1, nf in _shell_prange(mol, 0, mol.nbas, blksize):
            q0, q1 = q1, q1 + nf
            dm2_ao = lib.einsum('ijw,pi,qj->pqw', dm2buf, mo_cas[p0:p1], mo_cas[q0:q1])
            shls_slice = (shl0,shl1,b0,b1,0,mol.nbas,0,mol.nbas)
            eri1 = mol.intor('int2e_ip1', comp=3, aosym='s2kl',
                             shls_slice=shls_slice).reshape(3,p1-p0,nf,nao_pair)
            de[k] -= numpy.einsum('xijw,ijw->x', eri1, dm2_ao) * 2
            eri1 = None
        de[k] += numpy.einsum('xij,ij->x', vhf1c[:,p0:p1], dm1[p0:p1]) * 2
        de[k] += numpy.einsum('xij,ij->x', vhf1a[:,p0:p1], dm_core[p0:p1]) * 2

    de += mf_grad.grad_nuc(mol, atmlst)
    return de