def IX_intermediates(mycc, t1, t2, l1, l2, eris=None, d1=None, d2=None): if eris is None: # Note eris are in Chemist's notation eris = ccsd._ERIS(mycc) if d1 is None: d1 = ccsd_rdm.gamma1_intermediates(mycc, t1, t2, l1, l2) if d2 is None: d2 = ccsd_rdm.gamma2_incore(mycc, t1, t2, l1, l2) doo, dov, dvo, dvv = d1 dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov = d2 log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir # Note Ioo, Ivv are not hermitian Ioo = numpy.zeros((nocc, nocc)) Ivv = numpy.zeros((nvir, nvir)) Ivo = numpy.zeros((nvir, nocc)) Xvo = numpy.zeros((nvir, nocc)) eris_oooo = _cp(eris.oooo) eris_ooov = _cp(eris.ooov) d_oooo = _cp(doooo) d_oooo = _cp(d_oooo + d_oooo.transpose(1, 0, 2, 3)) #:Ioo += numpy.einsum('jmlk,imlk->ij', d_oooo, eris_oooo) * 2 Ioo += lib.dot(eris_oooo.reshape(nocc, -1), d_oooo.reshape(nocc, -1).T, 2) d_oooo = _cp(d_oooo.transpose(0, 2, 3, 1)) #:Xvo += numpy.einsum('iljk,ljka->ai', d_oooo, eris_ooov) * 2 Xvo += lib.dot( eris_ooov.reshape(-1, nvir).T, d_oooo.reshape(nocc, -1).T, 2) Xvo += (numpy.einsum('kj,kjia->ai', doo, eris_ooov) * 4 - numpy.einsum('kj,ikja->ai', doo + doo.T, eris_ooov)) eris_oooo = eris_ooov = d_oooo = None d_ooov = _cp(dooov) eris_oooo = _cp(eris.oooo) eris_ooov = _cp(eris.ooov) #:Ivv += numpy.einsum('ijkb,ijka->ab', d_ooov, eris_ooov) #:Ivo += numpy.einsum('jlka,jlki->ai', d_ooov, eris_oooo) Ivv += lib.dot(eris_ooov.reshape(-1, nvir).T, d_ooov.reshape(-1, nvir)) Ivo += lib.dot(d_ooov.reshape(-1, nvir).T, eris_oooo.reshape(-1, nocc)) #:Ioo += numpy.einsum('klja,klia->ij', d_ooov, eris_ooov) #:Xvo += numpy.einsum('kjib,kjba->ai', d_ooov, eris.oovv) eris_oovv = _cp(eris.oovv) tmp = _cp(d_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc)) tmpooov = _cp(eris_ooov.transpose(0, 1, 3, 2)) Ioo += lib.dot(tmpooov.reshape(-1, nocc).T, tmp) Xvo += lib.dot(eris_oovv.reshape(-1, nvir).T, tmp) eris_oooo = tmp = None d_ooov = d_ooov + d_ooov.transpose(1, 0, 2, 3) eris_ovov = _cp(eris.ovov) #:Ioo += numpy.einsum('jlka,ilka->ij', d_ooov, eris_ooov) #:Xvo += numpy.einsum('ijkb,kbja->ai', d_ooov, eris.ovov) Ioo += lib.dot(eris_ooov.reshape(nocc, -1), d_ooov.reshape(nocc, -1).T) Xvo += lib.dot( eris_ovov.reshape(-1, nvir).T, _cp(d_ooov.transpose(0, 2, 3, 1).reshape(nocc, -1)).T) d_ooov = None #:Ioo += numpy.einsum('kjba,kiba->ij', d_oovv, eris.oovv) #:Ivv += numpy.einsum('ijcb,ijca->ab', d_oovv, eris.oovv) #:Ivo += numpy.einsum('kjba,kjib->ai', d_oovv, eris.ooov) d_oovv = _cp(doovv + doovv.transpose(1, 0, 3, 2)) for i in range(nocc): Ioo += lib.dot(eris_oovv[i].reshape(nocc, -1), d_oovv[i].reshape(nocc, -1).T) Ivv += lib.dot(eris_oovv.reshape(-1, nvir).T, d_oovv.reshape(-1, nvir)) Ivo += lib.dot(d_oovv.reshape(-1, nvir).T, tmpooov.reshape(-1, nocc)) d_oovv = _ccsd.precontract(d_oovv.reshape(-1, nvir, nvir)).reshape(nocc, nocc, -1) eris_ooov = tmpooov = None blksize = 4 d_ovov = numpy.empty((nocc, nvir, nocc, nvir)) for p0, p1 in prange(0, nocc, blksize): d_ovov[p0:p1] = _cp(dovov[p0:p1]) d_ovvo = _cp(dovvo[p0:p1]) for i in range(p0, p1): d_ovov[i] += d_ovvo[i - p0].transpose(0, 2, 1) d_ovvo = None #:d_ovov = d_ovov + d_ovov.transpose(2,3,0,1) lib.transpose_sum(d_ovov.reshape(nov, nov), inplace=True) #:Ivo += numpy.einsum('jbka,jbki->ai', d_ovov, eris.ovoo) Ivo += lib.dot( d_ovov.reshape(-1, nvir).T, _cp(eris.ovoo).reshape(-1, nocc)) #:Ioo += numpy.einsum('jakb,iakb->ij', d_ovov, eris.ovov) #:Ivv += numpy.einsum('jcib,jcia->ab', d_ovov, eris.ovov) Ioo += lib.dot(eris_ovov.reshape(nocc, -1), d_ovov.reshape(nocc, -1).T) Ivv += lib.dot(eris_ovov.reshape(-1, nvir).T, d_ovov.reshape(-1, nvir)) nvir_pair = nvir * (nvir + 1) // 2 bufe_ovvv = numpy.empty((blksize, nvir, nvir, nvir)) bufc_ovvv = numpy.empty((blksize, nvir, nvir_pair)) bufc_ovvv.data = bufe_ovvv.data c_vvvo = numpy.empty((nvir_pair, nvir, nocc)) for p0, p1 in prange(0, nocc, blksize): d_ovvv = numpy.empty((p1 - p0, nvir, nvir, nvir)) #:Ivo += numpy.einsum('jadc,jidc->ai', d_ovvv, eris_oovv) for i in range(p1 - p0): lib.dot(dovvv[p0 + i].reshape(nvir, -1), eris_oovv[p0 + i].reshape(nocc, -1).T, 1, Ivo, 1) c_ovvv = bufc_ovvv[:p1 - p0] # tril part of (d_ovvv + d_ovvv.transpose(0,1,3,2)) _ccsd.precontract(dovvv[p0:p1].reshape(-1, nvir, nvir), out=c_ovvv) for i0, i1, in prange(0, nvir_pair, BLKSIZE): for j0, j1 in prange(0, nvir, BLKSIZE // (p1 - p0) + 1): c_vvvo[i0:i1, j0:j1, p0:p1] = c_ovvv[:, j0:j1, i0:i1].transpose(2, 1, 0) eris_ovx = _cp(eris.ovvv[p0:p1]) #:Xvo += numpy.einsum('jibc,jabc->ai', d_oovv, eris_ovvv) #:Ivv += numpy.einsum('ibdc,iadc->ab', d_ovvv, eris_ovvv) for i in range(p1 - p0): lib.dot(eris_ovx[i].reshape(nvir, -1), d_oovv[p0 + i].reshape(nocc, -1).T, 1, Xvo, 1) lib.dot(eris_ovx[i].reshape(nvir, -1), c_ovvv[i].reshape(nvir, -1).T, 1, Ivv, 1) eris_ovvv = bufe_ovvv[:p1 - p0] lib.unpack_tril(eris_ovx.reshape(-1, nvir_pair), out=eris_ovvv.reshape(-1, nvir**2)) eris_ovx = None #:Xvo += numpy.einsum('icjb,acjb->ai', d_ovov, eris_vvov) d_ovvo = _cp(d_ovov[p0:p1].transpose(0, 1, 3, 2)) lib.dot( eris_ovvv.reshape(-1, nvir).T, d_ovvo.reshape(-1, nocc), 1, Xvo, 1) e_ovvo, d_ovvo = d_ovvo, None for i in range(p1 - p0): d_ovvv[i] = _ccsd.sum021(dovvv[p0 + i]) e_ovvo[i] = eris_ovov[p0 + i].transpose(0, 2, 1) #:Ivo += numpy.einsum('jcab,jcib->ai', d_ovvv, eris_ovov) #:Ivv += numpy.einsum('icdb,icda->ab', d_ovvv, eris_ovvv) lib.dot( d_ovvv.reshape(-1, nvir).T, e_ovvo[:p1 - p0].reshape(-1, nocc), 1, Ivo, 1) lib.dot( eris_ovvv.reshape(-1, nvir).T, d_ovvv.reshape(-1, nvir), 1, Ivv, 1) Xvo[:, p0:p1] += (numpy.einsum('cb,iacb->ai', dvv, eris_ovvv) * 4 - numpy.einsum('cb,icba->ai', dvv + dvv.T, eris_ovvv)) d_oovv = d_ovvv = bufc_ovvv = bufe_ovvv = None eris_ovov = eris_ovvv = eris_oovv = e_ovvo = None eris_ovvv = _cp(eris.ovvv) bufe_vvvo = numpy.empty((blksize * nvir, nvir, nocc)) bufe_vvvv = numpy.empty((blksize * nvir, nvir, nvir)) bufd_vvvv = numpy.empty((blksize * nvir, nvir, nvir)) for p0, p1 in prange(0, nvir, blksize): off0 = p0 * (p0 + 1) // 2 off1 = p1 * (p1 + 1) // 2 d_vvvv = _cp(dvvvv[off0:off1]) * 4 for i in range(p0, p1): d_vvvv[i * (i + 1) // 2 + i - off0] *= .5 d_vvvv = lib.unpack_tril(d_vvvv, out=bufd_vvvv[:off1 - off0]) eris_vvvv = lib.unpack_tril(eris.vvvv[off0:off1], out=bufe_vvvv[:off1 - off0]) #:Ivv += numpy.einsum('decb,deca->ab', d_vvvv, eris_vvvv) * 2 #:Xvo += numpy.einsum('icdb,acdb->ai', d_ovvv, eris_vvvv) lib.dot( eris_vvvv.reshape(-1, nvir).T, d_vvvv.reshape(-1, nvir), 2, Ivv, 1) d_vvvo = _cp(c_vvvo[off0:off1]) lib.dot( eris_vvvv.reshape(-1, nvir).T, d_vvvo.reshape(-1, nocc), 1, Xvo, 1) #:Ioo += numpy.einsum('abjc,abci->ij', d_vvov, eris_vvvo) #:Ivo += numpy.einsum('dbca,dbci->ai', d_vvvv, eris_vvvo) * 2 eris_vvvo = bufe_vvvo[:off1 - off0] for i0, i1 in prange(off0, off1, BLKSIZE): for j0, j1, in prange(0, nvir, BLKSIZE // nocc + 1): eris_vvvo[i0 - off0:i1 - off0, j0:j1, :] = eris_ovvv[:, j0:j1, i0:i1].transpose(2, 1, 0) lib.dot( eris_vvvo.reshape(-1, nocc).T, d_vvvo.reshape(-1, nocc), 1, Ioo, 1) lib.dot( d_vvvv.reshape(-1, nvir).T, eris_vvvo.reshape(-1, nocc), 2, Ivo, 1) Ioo *= -1 Ivv *= -1 Ivo *= -1 Xvo += Ivo return Ioo, Ivv, Ivo, Xvo
def gamma2_incore(mycc, t1, t2, l1, l2): log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir time1 = time.clock(), time.time() #:theta = make_theta(t2) #:mOvOv = numpy.einsum('ikca,jkcb->jbia', l2, t2) #:mOVov = -numpy.einsum('ikca,jkbc->jbia', l2, t2) #:mOVov += numpy.einsum('ikac,jkbc->jbia', l2, theta) l2a = numpy.empty((nocc, nvir, nocc, nvir)) t2a = numpy.empty((nocc, nvir, nocc, nvir)) for i in range(nocc): l2a[i] = l2[i].transpose(2, 0, 1) t2a[i] = t2[i].transpose(2, 0, 1) mOvOv = lib.dot(t2a.reshape(-1, nov), l2a.reshape(-1, nov).T).reshape(nocc, nvir, nocc, nvir) for i in range(nocc): t2a[i] = t2[i].transpose(1, 0, 2) mOVov = lib.dot(t2a.reshape(-1, nov), l2a.reshape(-1, nov).T, -1).reshape(nocc, nvir, nocc, nvir) theta = t2a for i in range(nocc): l2a[i] = l2[i].transpose(1, 0, 2) theta[i] *= 2 theta[i] -= t2[i].transpose(2, 0, 1) lib.dot(theta.reshape(-1, nov), l2a.reshape(nov, -1).T, 1, mOVov.reshape(nov, -1), 1) theta = l2a = t2a = None moo = (numpy.einsum('jdld->jl', mOvOv) * 2 + numpy.einsum('jdld->jl', mOVov)) mvv = (numpy.einsum('lbld->bd', mOvOv) * 2 + numpy.einsum('lbld->bd', mOVov)) mia = (numpy.einsum('kc,ikac->ia', l1, t2) * 2 - numpy.einsum('kc,ikca->ia', l1, t2)) mab = numpy.einsum('kc,kb->cb', l1, t1) mij = numpy.einsum('kc,jc->jk', l1, t1) + moo * .5 gooov = numpy.zeros((nocc, nocc, nocc, nvir)) tau = _ccsd.make_tau(t2, t1, t1) #:goooo = numpy.einsum('ijab,klab->klij', l2, tau)*.5 goooo = lib.dot(tau.reshape(-1, nvir**2), l2.reshape(-1, nvir**2).T, .5) goooo = goooo.reshape(-1, nocc, nocc, nocc) doooo = _cp(make_theta(goooo).transpose(0, 2, 1, 3)) #:gooov -= numpy.einsum('ib,kjab->jkia', l1, tau) #:gooov -= numpy.einsum('kjab,ib->jkia', l2, t1) #:gooov += numpy.einsum('jkil,la->jkia', goooo, t1*2) gooov = lib.dot(_cp(tau.reshape(-1, nvir)), l1.T, -1) lib.dot(_cp(l2.reshape(-1, nvir)), t1.T, -1, gooov, 1) gooov = gooov.reshape(nocc, nocc, nvir, nocc) tmp = numpy.einsum('ji,ka->jkia', moo * -.5, t1) tmp += gooov.transpose(1, 0, 3, 2) gooov, tmp = tmp, None lib.dot(goooo.reshape(-1, nocc), t1, 2, gooov.reshape(-1, nvir), 1) goovv = numpy.einsum('ia,jb->ijab', mia, t1) for i in range(nocc): goovv[i] += .5 * l2[i] goovv[i] += .5 * tau[i] #:goovv -= numpy.einsum('jk,kiba->jiba', mij, tau) lib.dot(mij, tau.reshape(nocc, -1), -1, goovv.reshape(nocc, -1), 1) #:goovv -= numpy.einsum('cb,ijac->ijab', mab, t2) #:goovv -= numpy.einsum('bd,ijad->ijab', mvv*.5, tau) lib.dot(t2.reshape(-1, nvir), mab, -1, goovv.reshape(-1, nvir), 1) lib.dot(tau.reshape(-1, nvir), mvv.T, -.5, goovv.reshape(-1, nvir), 1) tau = None #:gooov += numpy.einsum('jaic,kc->jkia', mOvOv, t1) #:gooov -= numpy.einsum('kaic,jc->jkia', mOVov, t1) tmp = lib.dot(mOvOv.reshape(-1, nvir), t1.T).reshape(nocc, -1, nocc, nocc) gooov += tmp.transpose(0, 3, 2, 1) lib.dot(t1, mOVov.reshape(-1, nvir).T, 1, tmp.reshape(nocc, -1), 0) gooov -= tmp.reshape(nocc, nocc, nvir, nocc).transpose(0, 1, 3, 2) dooov = gooov.transpose(0, 2, 1, 3) * 2 - gooov.transpose(1, 2, 0, 3) gooov = None #:tmp = numpy.einsum('ikac,jc->jaik', l2, t1) #:gOvVo -= numpy.einsum('jaik,kb->jabi', tmp, t1) #:gOvvO = numpy.einsum('jaki,kb->jabi', tmp, t1) + mOvOv.transpose(0,3,1,2) tmp = tmp.reshape(nocc, nocc, nocc, nvir) lib.dot(t1, l2.reshape(-1, nvir).T, 1, tmp.reshape(nocc, -1)) gOvVo = numpy.einsum('ia,jb->jabi', l1, t1) gOvvO = numpy.empty((nocc, nvir, nvir, nocc)) for i in range(nocc): gOvVo[i] -= lib.dot(_cp(tmp[i].transpose(0, 2, 1).reshape(-1, nocc)), t1).reshape(nocc, nvir, -1).transpose(1, 2, 0) gOvVo[i] += mOVov[i].transpose(2, 0, 1) gOvvO[i] = lib.dot(tmp[i].reshape(nocc, -1).T, t1).reshape(nocc, nvir, -1).transpose(1, 2, 0) gOvvO[i] += mOvOv[i].transpose(2, 0, 1) tmp = None dovvo = numpy.empty((nocc, nvir, nvir, nocc)) doovv = numpy.empty((nocc, nocc, nvir, nvir)) for i in range(nocc): tmp = gOvVo[i] * 2 + gOvvO[i] dovvo[i] = tmp.transpose(1, 0, 2) tmp = gOvvO[i] * -2 - gOvVo[i] doovv[i] = tmp.transpose(2, 0, 1) gOvvO = gOvVo = None tau2 = _ccsd.make_tau(t2, t1, t1) #:goovv += numpy.einsum('ijkl,klab->ijab', goooo[:,:,j0:j1], tau2) lib.dot(goooo.reshape(nocc * nocc, -1), tau2.reshape(-1, nvir**2), 1, goovv.reshape(-1, nvir**2), 1) tau2 += numpy.einsum('ia,jb->ijab', t1, t1) tau2p = tau2.reshape(nocc, nvir, nocc, nvir) for i in range(nocc): tau2p[i] = tau2[i].transpose(2, 0, 1) tau2, tau2p = tau2p.reshape(nov, -1), None #:goovv += numpy.einsum('ibld,jlda->ijab', mOvOv, tau2) * .5 #:goovv -= numpy.einsum('iald,jldb->ijab', mOVov, tau2) * .5 tmp = lib.dot(mOvOv.reshape(-1, nov), tau2.T, .5).reshape(nocc, nvir, -1, nvir) for i in range(nocc): tmp[i] = goovv[i].transpose(1, 0, 2) + tmp[i].transpose(2, 1, 0) goovv, tmp = tmp, None lib.dot(mOVov.reshape(-1, nov), tau2.T, -.5, goovv.reshape(nov, -1), 1) #:goovv += numpy.einsum('iald,jlbd->ijab', mOVov*2+mOvOv, t2) * .5 t2a, tau2 = tau2.reshape(nocc, nvir, nocc, nvir), None for i in range(nocc): t2a[i] = t2[i].transpose(1, 0, 2) tmp = mOVov * 2 tmp += mOvOv lib.dot(tmp.reshape(-1, nov), t2a.reshape(nov, -1), .5, goovv.reshape(nov, -1), 1) t2a = tmp = None for i in range(nocc): goovv[i] = goovv[i] * 2 - goovv[i].transpose(2, 1, 0) dovov = goovv goooo = goovv = None #:gvovv += numpy.einsum('aick,kb->aibc', pvOVo, t1) mOVov = lib.transpose(mOVov.reshape(nov, -1)) gvovv = lib.dot(mOVov.reshape(nocc, -1).T, t1).reshape(nvir, nocc, nvir, nvir) mOVov = None tmp = numpy.einsum('ja,jb->ab', l1, t1) #:gvovv += numpy.einsum('ab,ic->aibc', tmp, t1) #:gvovv += numpy.einsum('ba,ic->aibc', mvv, t1*.5) for i in range(nvir): gvovv[i] += numpy.einsum('b,ic->icb', tmp[i], t1) gvovv[i] += numpy.einsum('b,ic->icb', mvv[:, i] * .5, t1) gvovv[i] = gvovv[i].transpose(0, 2, 1) #:gvovv += numpy.einsum('ja,jibc->aibc', l1, t2) #:gvovv += numpy.einsum('jibc,ja->aibc', l2, t1) #:gvovv -= numpy.einsum('aibk,kc->aibc', pvOvO, t1) mOvOv = lib.transpose(mOvOv.reshape(nov, -1)) lib.dot(mOvOv.reshape(nocc, -1).T, t1, -1, gvovv.reshape(-1, nvir), 1) mOvOv = None lib.dot(l1.T, t2.reshape(nocc, -1), 1, gvovv.reshape(nvir, -1), 1) lib.dot(t1.T, l2.reshape(nocc, -1), 1, gvovv.reshape(nvir, -1), 1) tmp = numpy.empty((nocc, nvir, nvir)) for i in range(nvir): #:gvovv*2 - gvovv.transpose(0,1,3,2) gvovv[i] = _ccsd.make_021(gvovv[i], gvovv[i], 2, -1, out=tmp) #:gvvvv = numpy.einsum('ijab,ijcd->abcd', l2, t2)*.5 #:jabc = numpy.einsum('ijab,ic->jabc', l2, t1) * .5 #:gvvvv += numpy.einsum('jabc,jd->abcd', jabc, t1) #:gvovv -= numpy.einsum('adbc,id->aibc', gvvvv, t1*2) tau = _ccsd.make_tau(t2, t1, t1) theta = make_theta(tau) tau = None l2tmp = _ccsd.pack_tril(l2.reshape(-1, nvir, nvir)) gtmp = lib.dot(l2tmp.T, theta.reshape(nocc**2, -1), .5).reshape(-1, nvir, nvir) l2tmp = theta = None nvir_pair = nvir * (nvir + 1) // 2 tmp = numpy.empty((nvir, nvir, nvir)) tmp1 = numpy.empty((nvir, nvir, nvir)) tmptril = numpy.empty((nvir, nvir_pair)) diag_idx = numpy.arange(nvir) diag_idx = diag_idx * (diag_idx + 1) // 2 + diag_idx dvvvv = numpy.empty((nvir_pair, nvir_pair)) dovvv = numpy.empty((nocc, nvir, nvir, nvir)) # dvvov = (gvovv*2 - gvovv.transpose(0,1,3,2)).transpose(0,2,1,3) # dovvv = dvvov.transpose(2,3,0,1) p0 = 0 for i in range(nvir): tmp[:i + 1] = gtmp[p0:p0 + i + 1] for j in range(i + 1, nvir): tmp[j] = gtmp[j * (j + 1) // 2 + i].T lib.dot(t1, tmp.reshape(nvir, -1), -2, gvovv[i].reshape(nocc, -1), 1) dovvv[:, :, i] = gvovv[i].transpose(0, 2, 1) #:gvvvv[i] = (tmp*2-tmp.transpose(0,2,1)).transpose(1,0,2) #:gvvvv = .5*(gvvvv+gvvvv.transpose(0,1,3,2)) #:dvvvv = .5*(gvvvv+gvvvv.transpose(1,0,3,2)) tmp1[:] = tmp.transpose(1, 0, 2) _ccsd.precontract(tmp1, diag_fac=2, out=tmptril) dvvvv[p0:p0 + i] += tmptril[:i] dvvvv[p0:p0 + i] *= .25 dvvvv[i * (i + 1) // 2 + i] = tmptril[i] * .5 for j in range(i + 1, nvir): dvvvv[j * (j + 1) // 2 + i] = tmptril[j] p0 += i + 1 gtmp = tmp = tmp1 = tmptril = gvovv = None dvvov = dovvv.transpose(2, 3, 0, 1) return (dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov)
def gamma2_outcore(mycc, t1, t2, l1, l2, h5fobj): log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir nvir_pair = nvir * (nvir+1) //2 dovov = h5fobj.create_dataset('dovov', (nocc,nvir,nocc,nvir), 'f8') dvvvv = h5fobj.create_dataset('dvvvv', (nvir_pair,nvir_pair), 'f8') doooo = h5fobj.create_dataset('doooo', (nocc,nocc,nocc,nocc), 'f8') doovv = h5fobj.create_dataset('doovv', (nocc,nocc,nvir,nvir), 'f8') dovvo = h5fobj.create_dataset('dovvo', (nocc,nvir,nvir,nocc), 'f8') dooov = h5fobj.create_dataset('dooov', (nocc,nocc,nocc,nvir), 'f8') _tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) fswap = h5py.File(_tmpfile.name) mOvOv = fswap.create_dataset('mOvOv', (nocc,nvir,nocc,nvir), 'f8') mOVov = fswap.create_dataset('mOVov', (nocc,nvir,nocc,nvir), 'f8') moo = numpy.empty((nocc,nocc)) mvv = numpy.zeros((nvir,nvir)) max_memory = mycc.max_memory - lib.current_memory()[0] unit = nocc*nvir**2 * 5 blksize = max(ccsd.BLKMIN, int(max_memory*.95e6/8/unit)) log.debug1('rdm intermediates pass 1: block size = %d, nocc = %d in %d blocks', blksize, nocc, int((nocc+blksize-1)/blksize)) time1 = time.clock(), time.time() for istep, (p0, p1) in enumerate(prange(0, nocc, blksize)): #:theta = make_theta(t2[p0:p1]) #:pOvOv = numpy.einsum('ikca,jkcb->jbia', l2, t2[p0:p1]) #:pOVov = -numpy.einsum('ikca,jkbc->jbia', l2, t2[p0:p1]) #:pOVov += numpy.einsum('ikac,jkbc->jbia', l2, theta) pOvOv = numpy.empty((nocc,p1-p0,nvir,nvir)) pOVov = numpy.empty((nocc,p1-p0,nvir,nvir)) t2a = numpy.empty((p1-p0,nvir,nocc,nvir)) t2b = numpy.empty((p1-p0,nvir,nocc,nvir)) theta = make_theta(t2[p0:p1]) tmp = numpy.empty_like(t2a) for i in range(p1-p0): t2a[i] = t2[p0+i].transpose(2,0,1) t2b[i] = t2[p0+i].transpose(1,0,2) tmp[i] = theta[i].transpose(1,0,2) t2a = t2a.reshape(-1,nov) t2b = t2b.reshape(-1,nov) theta, tmp = tmp.reshape(-1,nov), None for i in range(nocc): pOvOv[i] = lib.dot(t2a, l2[i].reshape(nov,-1)).reshape(-1,nvir,nvir) pOVov[i] = lib.dot(t2b, l2[i].reshape(nov,-1), -1).reshape(-1,nvir,nvir) pOVov[i] += lib.dot(theta, _cp(l2[i].transpose(0,2,1).reshape(nov,-1))).reshape(-1,nvir,nvir) theta = t2a = t2b = None mOvOv[p0:p1] = pOvOv.transpose(1,2,0,3) mOVov[p0:p1] = pOVov.transpose(1,2,0,3) fswap['mvOvO/%d'%istep] = pOvOv.transpose(3,1,2,0) fswap['mvOVo/%d'%istep] = pOVov.transpose(3,1,2,0) moo[p0:p1] =(numpy.einsum('ljdd->jl', pOvOv) * 2 + numpy.einsum('ljdd->jl', pOVov)) mvv +=(numpy.einsum('llbd->bd', pOvOv[p0:p1]) * 2 + numpy.einsum('llbd->bd', pOVov[p0:p1])) pOvOv = pOVov = None time1 = log.timer_debug1('rdm intermediates pass1 [%d:%d]'%(p0, p1), *time1) mia =(numpy.einsum('kc,ikac->ia', l1, t2) * 2 - numpy.einsum('kc,ikca->ia', l1, t2)) mab = numpy.einsum('kc,kb->cb', l1, t1) mij = numpy.einsum('kc,jc->jk', l1, t1) + moo*.5 gooov = numpy.einsum('ji,ka->jkia', moo*-.5, t1) max_memory = mycc.max_memory - lib.current_memory()[0] unit = nocc**3 + nocc**2*nvir + nocc*nvir**2*6 blksize = max(ccsd.BLKMIN, int(max_memory*.95e6/8/unit)) log.debug1('rdm intermediates pass 2: block size = %d, nocc = %d in %d blocks', blksize, nocc, int((nocc+blksize-1)/blksize)) for p0, p1 in prange(0, nocc, blksize): tau = _ccsd.make_tau(t2[p0:p1], t1[p0:p1], t1) #:goooo = numpy.einsum('ijab,klab->klij', l2, tau)*.5 goooo = lib.dot(tau.reshape(-1,nvir**2), l2.reshape(-1,nvir**2).T, .5) goooo = goooo.reshape(-1,nocc,nocc,nocc) h5fobj['doooo'][p0:p1] = make_theta(goooo).transpose(0,2,1,3) #:gooov[p0:p1] -= numpy.einsum('ib,jkba->jkia', l1, tau) #:gooov[p0:p1] -= numpy.einsum('jkba,ib->jkia', l2[p0:p1], t1) #:gooov[p0:p1] += numpy.einsum('jkil,la->jkia', goooo, t1*2) for i in range(p0,p1): gooov[i] -= lib.dot(_cp(tau[i-p0].transpose(0,2,1).reshape(-1,nvir)), l1.T).reshape(nocc,nvir,nocc).transpose(0,2,1) gooov[i] -= lib.dot(_cp(l2[i].transpose(0,2,1).reshape(-1,nvir)), t1.T).reshape(nocc,nvir,nocc).transpose(0,2,1) lib.dot(goooo.reshape(-1,nocc), t1, 2, gooov[p0:p1].reshape(-1,nvir), 1) #:goovv -= numpy.einsum('jk,ikab->ijab', mij, tau) goovv = numpy.einsum('ia,jb->ijab', mia[p0:p1], t1) for i in range(p1-p0): lib.dot(mij, tau[i].reshape(nocc,-1), -1, goovv[i].reshape(nocc,-1), 1) goovv[i] += .5 * l2[p0+i] goovv[i] += .5 * tau[i] #:goovv -= numpy.einsum('cb,ijac->ijab', mab, t2[p0:p1]) #:goovv -= numpy.einsum('bd,ijad->ijab', mvv*.5, tau) lib.dot(t2[p0:p1].reshape(-1,nvir), mab, -1, goovv.reshape(-1,nvir), 1) lib.dot(tau.reshape(-1,nvir), mvv.T, -.5, goovv.reshape(-1,nvir), 1) tau = None #==== mem usage nocc**3 + nocc*nvir**2 pOvOv = _cp(mOvOv[p0:p1]) pOVov = _cp(mOVov[p0:p1]) #:gooov[p0:p1,:] += numpy.einsum('jaic,kc->jkia', pOvOv, t1) #:gooov[:,p0:p1] -= numpy.einsum('kaic,jc->jkia', pOVov, t1) tmp = lib.dot(pOvOv.reshape(-1,nvir), t1.T).reshape(p1-p0,-1,nocc,nocc) gooov[p0:p1,:] += tmp.transpose(0,3,2,1) lib.dot(t1, pOVov.reshape(-1,nvir).T, 1, tmp.reshape(nocc,-1), 0) gooov[:,p0:p1] -= tmp.reshape(nocc,p1-p0,nvir,nocc).transpose(0,1,3,2) #:tmp = numpy.einsum('ikac,jc->jika', l2, t1[p0:p1]) #:gOvVo -= numpy.einsum('jika,kb->jabi', tmp, t1) #:gOvvO = numpy.einsum('jkia,kb->jabi', tmp, t1) + pOvOv.transpose(0,3,1,2) tmp = tmp.reshape(-1,nocc,nocc,nvir) lib.dot(t1[p0:p1], l2.reshape(-1,nvir).T, 1, tmp.reshape(p1-p0,-1)) gOvVo = numpy.einsum('ia,jb->jabi', l1, t1[p0:p1]) gOvvO = numpy.empty((p1-p0,nvir,nvir,nocc)) for i in range(p1-p0): gOvVo[i] -= lib.dot(_cp(tmp[i].transpose(0,2,1).reshape(-1,nocc)), t1).reshape(nocc,nvir,-1).transpose(1,2,0) gOvVo[i] += pOVov[i].transpose(2,0,1) gOvvO[i] = lib.dot(tmp[i].reshape(nocc,-1).T, t1).reshape(nocc,nvir,-1).transpose(1,2,0) gOvvO[i] += pOvOv[i].transpose(2,0,1) tmp = None #==== mem usage nocc**3 + nocc*nvir**6 dovvo[p0:p1] = (gOvVo*2 + gOvvO).transpose(0,2,1,3) gOvvO *= -2 gOvvO -= gOvVo doovv[p0:p1] = gOvvO.transpose(0,3,1,2) gOvvO = gOvVo = None for j0, j1 in prange(0, nocc, blksize): tau2 = _ccsd.make_tau(t2[j0:j1], t1[j0:j1], t1) #:goovv += numpy.einsum('ijkl,klab->ijab', goooo[:,:,j0:j1], tau2) lib.dot(goooo[:,:,j0:j1].copy().reshape((p1-p0)*nocc,-1), tau2.reshape(-1,nvir**2), 1, goovv.reshape(-1,nvir**2), 1) tau2 += numpy.einsum('ia,jb->ijab', t1[j0:j1], t1) tau2 = _cp(tau2.transpose(0,3,1,2).reshape(-1,nov)) #:goovv[:,j0:j1] += numpy.einsum('ibld,jlda->ijab', pOvOv, tau2) * .5 #:goovv[:,j0:j1] -= numpy.einsum('iald,jldb->ijab', pOVov, tau2) * .5 goovv[:,j0:j1] += lib.dot(pOvOv.reshape(-1,nov), tau2.T, .5).reshape(p1-p0,nvir,-1,nvir).transpose(0,2,3,1) goovv[:,j0:j1] += lib.dot(pOVov.reshape(-1,nov), tau2.T, -.5).reshape(p1-p0,nvir,-1,nvir).transpose(0,2,1,3) tau2 = None #==== mem usage nocc**3 + nocc*nvir**2*7 #:goovv += numpy.einsum('iald,jlbd->ijab', pOVov*2+pOvOv, t2) * .5 pOVov *= 2 pOVov += pOvOv for j in range(nocc): tmp = lib.dot(pOVov.reshape(-1,nov), _cp(t2[j].transpose(0,2,1).reshape(-1,nvir)), .5) goovv[:,j] += tmp.reshape(-1,nvir,nvir) tmp = None dovov[p0:p1] = make_theta(goovv).transpose(0,2,1,3) goooo = goovv = pOvOv = pOVov = None time1 = log.timer_debug1('rdm intermediates pass2 [%d:%d]'%(p0, p1), *time1) h5fobj['dooov'][:] = gooov.transpose(0,2,1,3)*2 - gooov.transpose(1,2,0,3) gooov = None max_memory = mycc.max_memory - lib.current_memory()[0] unit = max(nocc**2*nvir*2+nocc*nvir**2*2, nvir**3*2+nocc*nvir**2) blksize = max(ccsd.BLKMIN, int(max_memory*.95e6/8/unit)) iobuflen = int(256e6/8/blksize) log.debug1('rdm intermediates pass 3: block size = %d, nvir = %d in %d blocks', blksize, nocc, int((nvir+blksize-1)/blksize)) h5fobj.create_group('dovvv') for istep, (p0, p1) in enumerate(prange(0, nvir, blksize)): pvOvO = numpy.empty((p1-p0,nocc,nvir,nocc)) pvOVo = numpy.empty((p1-p0,nocc,nvir,nocc)) ao2mo.outcore._load_from_h5g(fswap['mvOvO'], p0, p1, pvOvO) ao2mo.outcore._load_from_h5g(fswap['mvOVo'], p0, p1, pvOVo) #:gvovv -= numpy.einsum('aibk,kc->aibc', pvOvO, t1) #:gvovv += numpy.einsum('aick,kb->aibc', pvOVo, t1) gvovv = lib.dot(pvOVo.reshape(-1,nocc), t1).reshape(-1,nocc,nvir,nvir) for i in range(p1-p0): gvovv[i] = gvovv[i].transpose(0,2,1) lib.dot(pvOvO.reshape(-1,nocc), t1, -1, gvovv.reshape(-1,nvir), 1) pvOvO = pvOVo = None #==== mem usage nocc**2*nvir*2 + nocc*nvir**2*2 l2tmp = l2[:,:,p0:p1] * .5 #:gvvvv = numpy.einsum('ijab,ijcd->abcd', l2tmp, t2) #:jabc = numpy.einsum('ijab,ic->jabc', l2tmp, t1) #:gvvvv += numpy.einsum('jabc,jd->abcd', jabc, t1) gvvvv = lib.dot(l2tmp.reshape(nocc**2,-1).T, t2.reshape(nocc**2,-1)) jabc = lib.dot(l2tmp.reshape(nocc,-1).T, t1) lib.dot(jabc.reshape(nocc,-1).T, t1, 1, gvvvv.reshape(-1,nvir), 1) gvvvv = gvvvv.reshape(-1,nvir,nvir,nvir) l2tmp = jabc = None #:gvovv = numpy.einsum('ja,jibc->aibc', l1[:,p0:p1], t2) #:gvovv += numpy.einsum('jibc,ja->aibc', l2, t1[:,p0:p1]) lib.dot(l1[:,p0:p1].copy().T, t2.reshape(nocc,-1), 1, gvovv.reshape(p1-p0,-1), 1) lib.dot(t1[:,p0:p1].copy().T, l2.reshape(nocc,-1), 1, gvovv.reshape(p1-p0,-1), 1) tmp = numpy.einsum('ja,jb->ab', l1[:,p0:p1], t1) gvovv += numpy.einsum('ab,ic->aibc', tmp, t1) gvovv += numpy.einsum('ba,ic->aibc', mvv[:,p0:p1]*.5, t1) #:gvovv -= numpy.einsum('adbc,id->aibc', gvvvv, t1*2) for j in range(p1-p0): lib.dot(t1, gvvvv[j].reshape(nvir,-1), -2, gvovv[j].reshape(nocc,-1), 1) # symmetrize dvvvv because it is symmetrized in ccsd_grad and make_rdm2 anyway #:dvvvv = .5*(gvvvv+gvvvv.transpose(0,1,3,2)) #:dvvvv = .5*(dvvvv+dvvvv.transpose(1,0,3,2)) # now dvvvv == dvvvv.transpose(2,3,0,1) == dvvvv.transpose(0,1,3,2) == dvvvv.transpose(1,0,3,2) tmp = numpy.empty((nvir,nvir,nvir)) tmp1 = numpy.empty((nvir,nvir,nvir)) tmpvvvv = numpy.empty((p1-p0,nvir,nvir_pair)) for i in range(p1-p0): make_theta(gvvvv[i:i+1], out=tmp) tmp1[:] = tmp.transpose(1,0,2) _ccsd.precontract(tmp1, diag_fac=2, out=tmpvvvv[i]) # tril of (dvvvv[p0:p1,p0:p1]+dvvvv[p0:p1,p0:p1].T) for i in range(p0, p1): for j in range(p0, i): tmpvvvv[i-p0,j] += tmpvvvv[j-p0,i] tmpvvvv[i-p0,i] *= 2 for i in range(p0, p1): off = i * (i+1) // 2 if p0 > 0: tmpvvvv[i-p0,:p0] += dvvvv[off:off+p0] dvvvv[off:off+i+1] = tmpvvvv[i-p0,:i+1] * .25 for i in range(p1, nvir): off = i * (i+1) // 2 dvvvv[off+p0:off+p1] = tmpvvvv[:,i] tmp = tmp1 = tmpvvvv = None #==== mem usage nvir**3 + nocc*nvir**2 gvvov = make_theta(gvovv).transpose(0,2,1,3) ao2mo.outcore._transpose_to_h5g(h5fobj, 'dovvv/%d'%istep, gvvov.reshape(-1,nov), iobuflen) gvvvv = None gvovv = None time1 = log.timer_debug1('rdm intermediates pass3 [%d:%d]'%(p0, p1), *time1) del(fswap['mOvOv']) del(fswap['mOVov']) del(fswap['mvOvO']) del(fswap['mvOVo']) fswap.close() _tmpfile = None return (h5fobj['dovov'], h5fobj['dvvvv'], h5fobj['doooo'], h5fobj['doovv'], h5fobj['dovvo'], None, h5fobj['dovvv'], h5fobj['dooov'])
def IX_intermediates(mycc, t1, t2, l1, l2, eris=None, d1=None, d2=None): if eris is None: # Note eris are in Chemist's notation eris = ccsd._ERIS(mycc) if d1 is None: d1 = ccsd_rdm.gamma1_intermediates(mycc, t1, t2, l1, l2) doo, dov, dvo, dvv = d1 if d2 is None: _d2tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) fd2intermediate = h5py.File(_d2tmpfile.name, 'w') ccsd_rdm.gamma2_outcore(mycc, t1, t2, l1, l2, fd2intermediate) dovov = fd2intermediate['dovov'] dvvvv = fd2intermediate['dvvvv'] doooo = fd2intermediate['doooo'] doovv = fd2intermediate['doovv'] dovvo = fd2intermediate['dovvo'] dovvv = fd2intermediate['dovvv'] dooov = fd2intermediate['dooov'] else: dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov = d2 log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir nvir_pair = nvir * (nvir + 1) // 2 _tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) fswap = h5py.File(_tmpfile.name, 'w') fswap.create_group('e_vvov') fswap.create_group('c_vvov') # Note Ioo, Ivv are not hermitian Ioo = numpy.zeros((nocc, nocc)) Ivv = numpy.zeros((nvir, nvir)) Ivo = numpy.zeros((nvir, nocc)) Xvo = numpy.zeros((nvir, nocc)) eris_oooo = _cp(eris.oooo) eris_ooov = _cp(eris.ooov) d_oooo = _cp(doooo) d_oooo = _cp(d_oooo + d_oooo.transpose(1, 0, 2, 3)) #:Ioo += numpy.einsum('jmlk,imlk->ij', d_oooo, eris_oooo) * 2 Ioo += lib.dot(eris_oooo.reshape(nocc, -1), d_oooo.reshape(nocc, -1).T, 2) d_oooo = _cp(d_oooo.transpose(0, 2, 3, 1)) #:Xvo += numpy.einsum('iljk,ljka->ai', d_oooo, eris_ooov) * 2 Xvo += lib.dot( eris_ooov.reshape(-1, nvir).T, d_oooo.reshape(nocc, -1).T, 2) Xvo += (numpy.einsum('kj,kjia->ai', doo, eris_ooov) * 4 - numpy.einsum('kj,ikja->ai', doo + doo.T, eris_ooov)) eris_oooo = eris_ooov = d_oooo = None d_ovov = numpy.empty((nocc, nvir, nocc, nvir)) blksize = 8 for p0, p1 in prange(0, nocc, blksize): d_ovov[p0:p1] = _cp(dovov[p0:p1]) d_ovvo = _cp(dovvo[p0:p1]) for i in range(p0, p1): d_ovov[i] += d_ovvo[i - p0].transpose(0, 2, 1) d_ovvo = None d_ovov = lib.transpose_sum(d_ovov.reshape(nov, nov)).reshape( nocc, nvir, nocc, nvir) #:Ivo += numpy.einsum('jbka,jbki->ai', d_ovov, eris.ovoo) Ivo += lib.dot( d_ovov.reshape(-1, nvir).T, _cp(eris.ovoo).reshape(-1, nocc)) eris_ovov = _cp(eris.ovov) #:Ioo += numpy.einsum('jakb,iakb->ij', d_ovov, eris.ovov) #:Ivv += numpy.einsum('jcib,jcia->ab', d_ovov, eris.ovov) Ioo += lib.dot(eris_ovov.reshape(nocc, -1), d_ovov.reshape(nocc, -1).T) Ivv += lib.dot(eris_ovov.reshape(-1, nvir).T, d_ovov.reshape(-1, nvir)) eris_ovov = None fswap['dovvo'] = d_ovov.transpose(0, 1, 3, 2) d_ovov = None max_memory = mycc.max_memory - lib.current_memory()[0] unit = max(nvir**3 * 2.5, nvir**3 * 2 + nocc * nvir**2) blksize = max(ccsd.BLKMIN, int(max_memory * 1e6 / 8 / unit)) iobuflen = int(256e6 / 8 / (blksize * nvir)) log.debug1( 'IX_intermediates pass 1: block size = %d, nocc = %d in %d blocks', blksize, nocc, int((nocc + blksize - 1) / blksize)) for istep, (p0, p1) in enumerate(prange(0, nocc, blksize)): d_ooov = _cp(dooov[p0:p1]) eris_oooo = _cp(eris.oooo[p0:p1]) eris_ooov = _cp(eris.ooov[p0:p1]) #:Ivv += numpy.einsum('ijkb,ijka->ab', d_ooov, eris_ooov) #:Ivo += numpy.einsum('jlka,jlki->ai', d_ooov, eris_oooo) Ivv += lib.dot(eris_ooov.reshape(-1, nvir).T, d_ooov.reshape(-1, nvir)) Ivo += lib.dot(d_ooov.reshape(-1, nvir).T, eris_oooo.reshape(-1, nocc)) #:Ioo += numpy.einsum('klja,klia->ij', d_ooov, eris_ooov) #:Xvo += numpy.einsum('kjib,kjba->ai', d_ooov, eris.oovv) eris_oovv = _cp(eris.oovv[p0:p1]) tmp = _cp(d_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc)) Ioo += lib.dot( _cp(eris_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc)).T, tmp) Xvo += lib.dot(eris_oovv.reshape(-1, nvir).T, tmp) eris_oooo = tmp = None d_ooov = d_ooov + dooov[:, p0:p1].transpose(1, 0, 2, 3) eris_ovov = _cp(eris.ovov[p0:p1]) #:Ioo += numpy.einsum('ljka,lika->ij', d_ooov, eris_ooov) #:Xvo += numpy.einsum('jikb,jakb->ai', d_ooov, eris_ovov) for i in range(p1 - p0): lib.dot(eris_ooov[i].reshape(nocc, -1), d_ooov[i].reshape(nocc, -1).T, 1, Ioo, 1) lib.dot(eris_ovov[i].reshape(nvir, -1), d_ooov[i].reshape(nocc, -1).T, 1, Xvo, 1) d_ooov = None #:Ioo += numpy.einsum('kjba,kiba->ij', d_oovv, eris.oovv) #:Ivv += numpy.einsum('ijcb,ijca->ab', d_oovv, eris.oovv) #:Ivo += numpy.einsum('kjba,kjib->ai', d_oovv, eris.ooov) d_oovv = _cp(doovv[p0:p1]) + doovv[:, p0:p1].transpose(1, 0, 3, 2) for i in range(p1 - p0): Ioo += lib.dot(eris_oovv[i].reshape(nocc, -1), d_oovv[i].reshape(nocc, -1).T) Ivv += lib.dot(eris_oovv.reshape(-1, nvir).T, d_oovv.reshape(-1, nvir)) Ivo += lib.dot( d_oovv.reshape(-1, nvir).T, _cp(eris_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc))) eris_ooov = None d_oovv = _ccsd.precontract(d_oovv.reshape(-1, nvir, nvir)).reshape( p1 - p0, nocc, -1) d_ovvv = numpy.empty((p1 - p0, nvir, nvir, nvir)) ao2mo.outcore._load_from_h5g(dovvv, p0 * nvir, p1 * nvir, d_ovvv.reshape(-1, nvir**2)) #:Ivo += numpy.einsum('jadc,jidc->ai', d_ovvv, eris_oovv) for i in range(p1 - p0): Ivo += lib.dot(d_ovvv[i].reshape(nvir, -1), eris_oovv[i].reshape(nocc, -1).T) eris_oovv = None # tril part of (d_ovvv + d_ovvv.transpose(0,1,3,2)) c_ovvv = _ccsd.precontract(d_ovvv.reshape(-1, nvir, nvir)) ao2mo.outcore._transpose_to_h5g(fswap, 'c_vvov/%d' % istep, c_ovvv, iobuflen) c_ovvv = c_ovvv.reshape(-1, nvir, nvir_pair) eris_ovx = _cp(eris.ovvv[p0:p1]) ao2mo.outcore._transpose_to_h5g(fswap, 'e_vvov/%d' % istep, eris_ovx.reshape(-1, nvir_pair), iobuflen) #:Xvo += numpy.einsum('jibc,jabc->ai', d_oovv, eris_ovvv) #:Ivv += numpy.einsum('ibdc,iadc->ab', d_ovvv, eris_ovvv) for i in range(p1 - p0): lib.dot(eris_ovx[i].reshape(nvir, -1), d_oovv[i].reshape(nocc, -1).T, 1, Xvo, 1) lib.dot(eris_ovx[i].reshape(nvir, -1), c_ovvv[i].reshape(nvir, -1).T, 1, Ivv, 1) c_ovvv = d_oovv = None eris_ovvo = numpy.empty((p1 - p0, nvir, nvir, nocc)) for i in range(p1 - p0): d_ovvv[i] = _ccsd.sum021(d_ovvv[i]) eris_ovvo[i] = eris_ovov[i].transpose(0, 2, 1) #:Ivo += numpy.einsum('abjc,ibjc->ai', d_ovvv, eris_ovov) Ivo += lib.dot(d_ovvv.reshape(-1, nvir).T, eris_ovvo.reshape(-1, nocc)) eris_ovvo = eris_ovov = None eris_ovvv = lib.unpack_tril(eris_ovx.reshape(-1, nvir_pair)) eris_ovx = None eris_ovvv = eris_ovvv.reshape(p1 - p0, nvir, nvir, nvir) #:Ivv += numpy.einsum('icdb,icda->ab', d_ovvv, eris_ovvv) #:Xvo += numpy.einsum('jibc,jabc->ai', d_oovv, eris_ovvv) Ivv += lib.dot(eris_ovvv.reshape(-1, nvir).T, d_ovvv.reshape(-1, nvir)) Xvo[:, p0:p1] += (numpy.einsum('cb,iacb->ai', dvv, eris_ovvv) * 4 - numpy.einsum('cb,icba->ai', dvv + dvv.T, eris_ovvv)) d_ovvo = _cp(fswap['dovvo'][p0:p1]) #:Xvo += numpy.einsum('jbic,jbca->ai', d_ovov, eris_ovvv) lib.dot( eris_ovvv.reshape(-1, nvir).T, d_ovvo.reshape(-1, nocc), 1, Xvo, 1) d_ovvv = d_ovvo = eris_ovvv = None max_memory = mycc.max_memory - lib.current_memory()[0] unit = nocc * nvir**2 + nvir**3 * 2.5 blksize = max(ccsd.BLKMIN, int(max_memory * 1e6 / 8 / unit)) log.debug1( 'IX_intermediates pass 2: block size = %d, nocc = %d in %d blocks', blksize, nocc, int((nocc + blksize - 1) / blksize)) for p0, p1 in prange(0, nvir, blksize): off0 = p0 * (p0 + 1) // 2 off1 = p1 * (p1 + 1) // 2 d_vvvv = _cp(dvvvv[off0:off1]) * 4 for i in range(p0, p1): d_vvvv[i * (i + 1) // 2 + i - off0] *= .5 d_vvvv = lib.unpack_tril(d_vvvv) eris_vvvv = lib.unpack_tril(_cp(eris.vvvv[off0:off1])) #:Ivv += numpy.einsum('decb,deca->ab', d_vvvv, eris_vvvv) * 2 #:Xvo += numpy.einsum('dbic,dbca->ai', d_vvov, eris_vvvv) lib.dot( eris_vvvv.reshape(-1, nvir).T, d_vvvv.reshape(-1, nvir), 2, Ivv, 1) #:d_vvvv = _cp(d_vvvv + d_vvvv.transpose(0,1,3,2)) d_vvov = numpy.empty((off1 - off0, nocc, nvir)) ao2mo.outcore._load_from_h5g(fswap['c_vvov'], off0, off1, d_vvov.reshape(-1, nov)) d_vvvo = _cp(d_vvov.transpose(0, 2, 1)) lib.dot( eris_vvvv.reshape(-1, nvir).T, d_vvvo.reshape(-1, nocc), 1, Xvo, 1) d_vvov = eris_vvvv = None eris_vvov = numpy.empty((off1 - off0, nocc, nvir)) ao2mo.outcore._load_from_h5g(fswap['e_vvov'], off0, off1, eris_vvov.reshape(-1, nov)) eris_vvvo = _cp(eris_vvov.transpose(0, 2, 1)) #:Ioo += numpy.einsum('abjc,abci->ij', d_vvov, eris_vvvo) #:Ivo += numpy.einsum('dbca,dbci->ai', d_vvvv, eris_vvvo) * 2 lib.dot( d_vvvv.reshape(-1, nvir).T, eris_vvvo.reshape(-1, nocc), 2, Ivo, 1) lib.dot( eris_vvvo.reshape(-1, nocc).T, d_vvvo.reshape(-1, nocc), 1, Ioo, 1) eris_vvov = eris_vovv = d_vvvv = None del (fswap['e_vvov']) del (fswap['c_vvov']) del (fswap['dovvo']) fswap.close() _tmpfile = None if d2 is None: for key in fd2intermediate.keys(): del (fd2intermediate[key]) fd2intermediate.close() _d2tmpfile = None Ioo *= -1 Ivv *= -1 Ivo *= -1 Xvo += Ivo return Ioo, Ivv, Ivo, Xvo
def gamma2_incore(mycc, t1, t2, l1, l2): log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir time1 = time.clock(), time.time() #:theta = make_theta(t2) #:mOvOv = numpy.einsum('ikca,jkcb->jbia', l2, t2) #:mOVov = -numpy.einsum('ikca,jkbc->jbia', l2, t2) #:mOVov += numpy.einsum('ikac,jkbc->jbia', l2, theta) l2a = numpy.empty((nocc,nvir,nocc,nvir)) t2a = numpy.empty((nocc,nvir,nocc,nvir)) for i in range(nocc): l2a[i] = l2[i].transpose(2,0,1) t2a[i] = t2[i].transpose(2,0,1) mOvOv = lib.dot(t2a.reshape(-1,nov), l2a.reshape(-1,nov).T).reshape(nocc,nvir,nocc,nvir) for i in range(nocc): t2a[i] = t2[i].transpose(1,0,2) mOVov = lib.dot(t2a.reshape(-1,nov), l2a.reshape(-1,nov).T, -1).reshape(nocc,nvir,nocc,nvir) theta = t2a for i in range(nocc): l2a[i] = l2[i].transpose(1,0,2) theta[i] *= 2 theta[i] -= t2[i].transpose(2,0,1) lib.dot(theta.reshape(-1,nov), l2a.reshape(nov,-1).T, 1, mOVov.reshape(nov,-1), 1) theta = l2a = t2a = None moo =(numpy.einsum('jdld->jl', mOvOv) * 2 + numpy.einsum('jdld->jl', mOVov)) mvv =(numpy.einsum('lbld->bd', mOvOv) * 2 + numpy.einsum('lbld->bd', mOVov)) mia =(numpy.einsum('kc,ikac->ia', l1, t2) * 2 - numpy.einsum('kc,ikca->ia', l1, t2)) mab = numpy.einsum('kc,kb->cb', l1, t1) mij = numpy.einsum('kc,jc->jk', l1, t1) + moo*.5 gooov = numpy.zeros((nocc,nocc,nocc,nvir)) tau = _ccsd.make_tau(t2, t1, t1) #:goooo = numpy.einsum('ijab,klab->klij', l2, tau)*.5 goooo = lib.dot(tau.reshape(-1,nvir**2), l2.reshape(-1,nvir**2).T, .5) goooo = goooo.reshape(-1,nocc,nocc,nocc) doooo = _cp(make_theta(goooo).transpose(0,2,1,3)) #:gooov -= numpy.einsum('ib,kjab->jkia', l1, tau) #:gooov -= numpy.einsum('kjab,ib->jkia', l2, t1) #:gooov += numpy.einsum('jkil,la->jkia', goooo, t1*2) gooov = lib.dot(_cp(tau.reshape(-1,nvir)), l1.T, -1) lib.dot(_cp(l2.reshape(-1,nvir)), t1.T, -1, gooov, 1) gooov = gooov.reshape(nocc,nocc,nvir,nocc) tmp = numpy.einsum('ji,ka->jkia', moo*-.5, t1) tmp += gooov.transpose(1,0,3,2) gooov, tmp = tmp, None lib.dot(goooo.reshape(-1,nocc), t1, 2, gooov.reshape(-1,nvir), 1) goovv = numpy.einsum('ia,jb->ijab', mia, t1) for i in range(nocc): goovv[i] += .5 * l2 [i] goovv[i] += .5 * tau[i] #:goovv -= numpy.einsum('jk,kiba->jiba', mij, tau) lib.dot(mij, tau.reshape(nocc,-1), -1, goovv.reshape(nocc,-1), 1) #:goovv -= numpy.einsum('cb,ijac->ijab', mab, t2) #:goovv -= numpy.einsum('bd,ijad->ijab', mvv*.5, tau) lib.dot(t2.reshape(-1,nvir), mab, -1, goovv.reshape(-1,nvir), 1) lib.dot(tau.reshape(-1,nvir), mvv.T, -.5, goovv.reshape(-1,nvir), 1) tau = None #:gooov += numpy.einsum('jaic,kc->jkia', mOvOv, t1) #:gooov -= numpy.einsum('kaic,jc->jkia', mOVov, t1) tmp = lib.dot(mOvOv.reshape(-1,nvir), t1.T).reshape(nocc,-1,nocc,nocc) gooov += tmp.transpose(0,3,2,1) lib.dot(t1, mOVov.reshape(-1,nvir).T, 1, tmp.reshape(nocc,-1), 0) gooov -= tmp.reshape(nocc,nocc,nvir,nocc).transpose(0,1,3,2) dooov = gooov.transpose(0,2,1,3)*2 - gooov.transpose(1,2,0,3) gooov = None #:tmp = numpy.einsum('ikac,jc->jaik', l2, t1) #:gOvVo -= numpy.einsum('jaik,kb->jabi', tmp, t1) #:gOvvO = numpy.einsum('jaki,kb->jabi', tmp, t1) + mOvOv.transpose(0,3,1,2) tmp = tmp.reshape(nocc,nocc,nocc,nvir) lib.dot(t1, l2.reshape(-1,nvir).T, 1, tmp.reshape(nocc,-1)) gOvVo = numpy.einsum('ia,jb->jabi', l1, t1) gOvvO = numpy.empty((nocc,nvir,nvir,nocc)) for i in range(nocc): gOvVo[i] -= lib.dot(_cp(tmp[i].transpose(0,2,1).reshape(-1,nocc)), t1).reshape(nocc,nvir,-1).transpose(1,2,0) gOvVo[i] += mOVov[i].transpose(2,0,1) gOvvO[i] = lib.dot(tmp[i].reshape(nocc,-1).T, t1).reshape(nocc,nvir,-1).transpose(1,2,0) gOvvO[i] += mOvOv[i].transpose(2,0,1) tmp = None dovvo = numpy.empty((nocc,nvir,nvir,nocc)) doovv = numpy.empty((nocc,nocc,nvir,nvir)) for i in range(nocc): tmp = gOvVo[i] * 2 + gOvvO[i] dovvo[i] = tmp.transpose(1,0,2) tmp = gOvvO[i] * -2 - gOvVo[i] doovv[i] = tmp.transpose(2,0,1) gOvvO = gOvVo = None tau2 = _ccsd.make_tau(t2, t1, t1) #:goovv += numpy.einsum('ijkl,klab->ijab', goooo[:,:,j0:j1], tau2) lib.dot(goooo.reshape(nocc*nocc,-1), tau2.reshape(-1,nvir**2), 1, goovv.reshape(-1,nvir**2), 1) tau2 += numpy.einsum('ia,jb->ijab', t1, t1) tau2p = tau2.reshape(nocc,nvir,nocc,nvir) for i in range(nocc): tau2p[i] = tau2[i].transpose(2,0,1) tau2, tau2p = tau2p.reshape(nov,-1), None #:goovv += numpy.einsum('ibld,jlda->ijab', mOvOv, tau2) * .5 #:goovv -= numpy.einsum('iald,jldb->ijab', mOVov, tau2) * .5 tmp = lib.dot(mOvOv.reshape(-1,nov), tau2.T, .5).reshape(nocc,nvir,-1,nvir) for i in range(nocc): tmp[i] = goovv[i].transpose(1,0,2) + tmp[i].transpose(2,1,0) goovv, tmp = tmp, None lib.dot(mOVov.reshape(-1,nov), tau2.T, -.5, goovv.reshape(nov,-1), 1) #:goovv += numpy.einsum('iald,jlbd->ijab', mOVov*2+mOvOv, t2) * .5 t2a, tau2 = tau2.reshape(nocc,nvir,nocc,nvir), None for i in range(nocc): t2a[i] = t2[i].transpose(1,0,2) tmp = mOVov*2 tmp += mOvOv lib.dot(tmp.reshape(-1,nov), t2a.reshape(nov,-1), .5, goovv.reshape(nov,-1), 1) t2a = tmp = None for i in range(nocc): goovv[i] = goovv[i] * 2 - goovv[i].transpose(2,1,0) dovov = goovv goooo = goovv = None #:gvovv += numpy.einsum('aick,kb->aibc', pvOVo, t1) mOVov = lib.transpose(mOVov.reshape(nov,-1)) gvovv = lib.dot(mOVov.reshape(nocc,-1).T, t1).reshape(nvir,nocc,nvir,nvir) mOVov = None tmp = numpy.einsum('ja,jb->ab', l1, t1) #:gvovv += numpy.einsum('ab,ic->aibc', tmp, t1) #:gvovv += numpy.einsum('ba,ic->aibc', mvv, t1*.5) for i in range(nvir): gvovv[i] += numpy.einsum('b,ic->icb', tmp[i], t1) gvovv[i] += numpy.einsum('b,ic->icb', mvv[:,i]*.5, t1) gvovv[i] = gvovv[i].transpose(0,2,1) #:gvovv += numpy.einsum('ja,jibc->aibc', l1, t2) #:gvovv += numpy.einsum('jibc,ja->aibc', l2, t1) #:gvovv -= numpy.einsum('aibk,kc->aibc', pvOvO, t1) mOvOv = lib.transpose(mOvOv.reshape(nov,-1)) lib.dot(mOvOv.reshape(nocc,-1).T, t1, -1, gvovv.reshape(-1,nvir), 1) mOvOv = None lib.dot(l1.T, t2.reshape(nocc,-1), 1, gvovv.reshape(nvir,-1), 1) lib.dot(t1.T, l2.reshape(nocc,-1), 1, gvovv.reshape(nvir,-1), 1) tmp = numpy.empty((nocc,nvir,nvir)) for i in range(nvir): #:gvovv*2 - gvovv.transpose(0,1,3,2) gvovv[i] = _ccsd.make_021(gvovv[i], gvovv[i], 2, -1, out=tmp) #:gvvvv = numpy.einsum('ijab,ijcd->abcd', l2, t2)*.5 #:jabc = numpy.einsum('ijab,ic->jabc', l2, t1) * .5 #:gvvvv += numpy.einsum('jabc,jd->abcd', jabc, t1) #:gvovv -= numpy.einsum('adbc,id->aibc', gvvvv, t1*2) tau = _ccsd.make_tau(t2, t1, t1) theta = make_theta(tau) tau = None l2tmp = lib.pack_tril(l2.reshape(-1,nvir,nvir)) gtmp = lib.dot(l2tmp.T, theta.reshape(nocc**2,-1), .5).reshape(-1,nvir,nvir) l2tmp = theta = None nvir_pair = nvir * (nvir+1) //2 tmp = numpy.empty((nvir,nvir,nvir)) tmp1 = numpy.empty((nvir,nvir,nvir)) tmptril = numpy.empty((nvir,nvir_pair)) diag_idx = numpy.arange(nvir) diag_idx = diag_idx*(diag_idx+1)//2 + diag_idx dvvvv = numpy.empty((nvir_pair,nvir_pair)) dovvv = numpy.empty((nocc,nvir,nvir,nvir)) # dvvov = (gvovv*2 - gvovv.transpose(0,1,3,2)).transpose(0,2,1,3) # dovvv = dvvov.transpose(2,3,0,1) p0 = 0 for i in range(nvir): tmp[:i+1] = gtmp[p0:p0+i+1] for j in range(i+1, nvir): tmp[j] = gtmp[j*(j+1)//2+i].T lib.dot(t1, tmp.reshape(nvir,-1), -2, gvovv[i].reshape(nocc,-1), 1) dovvv[:,:,i] = gvovv[i].transpose(0,2,1) #:gvvvv[i] = (tmp*2-tmp.transpose(0,2,1)).transpose(1,0,2) #:gvvvv = .5*(gvvvv+gvvvv.transpose(0,1,3,2)) #:dvvvv = .5*(gvvvv+gvvvv.transpose(1,0,3,2)) tmp1[:] = tmp.transpose(1,0,2) _ccsd.precontract(tmp1, diag_fac=2, out=tmptril) dvvvv[p0:p0+i] += tmptril[:i] dvvvv[p0:p0+i] *= .25 dvvvv[i*(i+1)//2+i] = tmptril[i] * .5 for j in range(i+1, nvir): dvvvv[j*(j+1)//2+i] = tmptril[j] p0 += i + 1 gtmp = tmp = tmp1 = tmptril = gvovv = None dvvov = dovvv.transpose(2,3,0,1) return (dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov)
def gamma2_outcore(mycc, t1, t2, l1, l2, h5fobj): log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir nvir_pair = nvir * (nvir + 1) // 2 dovov = h5fobj.create_dataset('dovov', (nocc, nvir, nocc, nvir), 'f8') dvvvv = h5fobj.create_dataset('dvvvv', (nvir_pair, nvir_pair), 'f8') doooo = h5fobj.create_dataset('doooo', (nocc, nocc, nocc, nocc), 'f8') doovv = h5fobj.create_dataset('doovv', (nocc, nocc, nvir, nvir), 'f8') dovvo = h5fobj.create_dataset('dovvo', (nocc, nvir, nvir, nocc), 'f8') dooov = h5fobj.create_dataset('dooov', (nocc, nocc, nocc, nvir), 'f8') _tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) fswap = h5py.File(_tmpfile.name) mOvOv = fswap.create_dataset('mOvOv', (nocc, nvir, nocc, nvir), 'f8') mOVov = fswap.create_dataset('mOVov', (nocc, nvir, nocc, nvir), 'f8') moo = numpy.empty((nocc, nocc)) mvv = numpy.zeros((nvir, nvir)) max_memory = mycc.max_memory - lib.current_memory()[0] unit = nocc * nvir**2 * 5 blksize = max(ccsd.BLKMIN, int(max_memory * .95e6 / 8 / unit)) log.debug1( 'rdm intermediates pass 1: block size = %d, nocc = %d in %d blocks', blksize, nocc, int((nocc + blksize - 1) / blksize)) time1 = time.clock(), time.time() for istep, (p0, p1) in enumerate(prange(0, nocc, blksize)): #:theta = make_theta(t2[p0:p1]) #:pOvOv = numpy.einsum('ikca,jkcb->jbia', l2, t2[p0:p1]) #:pOVov = -numpy.einsum('ikca,jkbc->jbia', l2, t2[p0:p1]) #:pOVov += numpy.einsum('ikac,jkbc->jbia', l2, theta) pOvOv = numpy.empty((nocc, p1 - p0, nvir, nvir)) pOVov = numpy.empty((nocc, p1 - p0, nvir, nvir)) t2a = numpy.empty((p1 - p0, nvir, nocc, nvir)) t2b = numpy.empty((p1 - p0, nvir, nocc, nvir)) theta = make_theta(t2[p0:p1]) tmp = numpy.empty_like(t2a) for i in range(p1 - p0): t2a[i] = t2[p0 + i].transpose(2, 0, 1) t2b[i] = t2[p0 + i].transpose(1, 0, 2) tmp[i] = theta[i].transpose(1, 0, 2) t2a = t2a.reshape(-1, nov) t2b = t2b.reshape(-1, nov) theta, tmp = tmp.reshape(-1, nov), None for i in range(nocc): pOvOv[i] = lib.dot(t2a, l2[i].reshape(nov, -1)).reshape(-1, nvir, nvir) pOVov[i] = lib.dot(t2b, l2[i].reshape(nov, -1), -1).reshape(-1, nvir, nvir) pOVov[i] += lib.dot(theta, _cp(l2[i].transpose(0, 2, 1).reshape( nov, -1))).reshape(-1, nvir, nvir) theta = t2a = t2b = None mOvOv[p0:p1] = pOvOv.transpose(1, 2, 0, 3) mOVov[p0:p1] = pOVov.transpose(1, 2, 0, 3) fswap['mvOvO/%d' % istep] = pOvOv.transpose(3, 1, 2, 0) fswap['mvOVo/%d' % istep] = pOVov.transpose(3, 1, 2, 0) moo[p0:p1] = (numpy.einsum('ljdd->jl', pOvOv) * 2 + numpy.einsum('ljdd->jl', pOVov)) mvv += (numpy.einsum('llbd->bd', pOvOv[p0:p1]) * 2 + numpy.einsum('llbd->bd', pOVov[p0:p1])) pOvOv = pOVov = None time1 = log.timer_debug1('rdm intermediates pass1 [%d:%d]' % (p0, p1), *time1) mia = (numpy.einsum('kc,ikac->ia', l1, t2) * 2 - numpy.einsum('kc,ikca->ia', l1, t2)) mab = numpy.einsum('kc,kb->cb', l1, t1) mij = numpy.einsum('kc,jc->jk', l1, t1) + moo * .5 gooov = numpy.einsum('ji,ka->jkia', moo * -.5, t1) max_memory = mycc.max_memory - lib.current_memory()[0] unit = nocc**3 + nocc**2 * nvir + nocc * nvir**2 * 6 blksize = max(ccsd.BLKMIN, int(max_memory * .95e6 / 8 / unit)) log.debug1( 'rdm intermediates pass 2: block size = %d, nocc = %d in %d blocks', blksize, nocc, int((nocc + blksize - 1) / blksize)) for p0, p1 in prange(0, nocc, blksize): tau = _ccsd.make_tau(t2[p0:p1], t1[p0:p1], t1) #:goooo = numpy.einsum('ijab,klab->klij', l2, tau)*.5 goooo = lib.dot(tau.reshape(-1, nvir**2), l2.reshape(-1, nvir**2).T, .5) goooo = goooo.reshape(-1, nocc, nocc, nocc) h5fobj['doooo'][p0:p1] = make_theta(goooo).transpose(0, 2, 1, 3) #:gooov[p0:p1] -= numpy.einsum('ib,jkba->jkia', l1, tau) #:gooov[p0:p1] -= numpy.einsum('jkba,ib->jkia', l2[p0:p1], t1) #:gooov[p0:p1] += numpy.einsum('jkil,la->jkia', goooo, t1*2) for i in range(p0, p1): gooov[i] -= lib.dot( _cp(tau[i - p0].transpose(0, 2, 1).reshape(-1, nvir)), l1.T).reshape(nocc, nvir, nocc).transpose(0, 2, 1) gooov[i] -= lib.dot( _cp(l2[i].transpose(0, 2, 1).reshape(-1, nvir)), t1.T).reshape(nocc, nvir, nocc).transpose(0, 2, 1) lib.dot(goooo.reshape(-1, nocc), t1, 2, gooov[p0:p1].reshape(-1, nvir), 1) #:goovv -= numpy.einsum('jk,ikab->ijab', mij, tau) goovv = numpy.einsum('ia,jb->ijab', mia[p0:p1], t1) for i in range(p1 - p0): lib.dot(mij, tau[i].reshape(nocc, -1), -1, goovv[i].reshape(nocc, -1), 1) goovv[i] += .5 * l2[p0 + i] goovv[i] += .5 * tau[i] #:goovv -= numpy.einsum('cb,ijac->ijab', mab, t2[p0:p1]) #:goovv -= numpy.einsum('bd,ijad->ijab', mvv*.5, tau) lib.dot(t2[p0:p1].reshape(-1, nvir), mab, -1, goovv.reshape(-1, nvir), 1) lib.dot(tau.reshape(-1, nvir), mvv.T, -.5, goovv.reshape(-1, nvir), 1) tau = None #==== mem usage nocc**3 + nocc*nvir**2 pOvOv = _cp(mOvOv[p0:p1]) pOVov = _cp(mOVov[p0:p1]) #:gooov[p0:p1,:] += numpy.einsum('jaic,kc->jkia', pOvOv, t1) #:gooov[:,p0:p1] -= numpy.einsum('kaic,jc->jkia', pOVov, t1) tmp = lib.dot(pOvOv.reshape(-1, nvir), t1.T).reshape(p1 - p0, -1, nocc, nocc) gooov[p0:p1, :] += tmp.transpose(0, 3, 2, 1) lib.dot(t1, pOVov.reshape(-1, nvir).T, 1, tmp.reshape(nocc, -1), 0) gooov[:, p0:p1] -= tmp.reshape(nocc, p1 - p0, nvir, nocc).transpose(0, 1, 3, 2) #:tmp = numpy.einsum('ikac,jc->jika', l2, t1[p0:p1]) #:gOvVo -= numpy.einsum('jika,kb->jabi', tmp, t1) #:gOvvO = numpy.einsum('jkia,kb->jabi', tmp, t1) + pOvOv.transpose(0,3,1,2) tmp = tmp.reshape(-1, nocc, nocc, nvir) lib.dot(t1[p0:p1], l2.reshape(-1, nvir).T, 1, tmp.reshape(p1 - p0, -1)) gOvVo = numpy.einsum('ia,jb->jabi', l1, t1[p0:p1]) gOvvO = numpy.empty((p1 - p0, nvir, nvir, nocc)) for i in range(p1 - p0): gOvVo[i] -= lib.dot( _cp(tmp[i].transpose(0, 2, 1).reshape(-1, nocc)), t1).reshape(nocc, nvir, -1).transpose(1, 2, 0) gOvVo[i] += pOVov[i].transpose(2, 0, 1) gOvvO[i] = lib.dot(tmp[i].reshape(nocc, -1).T, t1).reshape(nocc, nvir, -1).transpose(1, 2, 0) gOvvO[i] += pOvOv[i].transpose(2, 0, 1) tmp = None #==== mem usage nocc**3 + nocc*nvir**6 dovvo[p0:p1] = (gOvVo * 2 + gOvvO).transpose(0, 2, 1, 3) gOvvO *= -2 gOvvO -= gOvVo doovv[p0:p1] = gOvvO.transpose(0, 3, 1, 2) gOvvO = gOvVo = None for j0, j1 in prange(0, nocc, blksize): tau2 = _ccsd.make_tau(t2[j0:j1], t1[j0:j1], t1) #:goovv += numpy.einsum('ijkl,klab->ijab', goooo[:,:,j0:j1], tau2) lib.dot(goooo[:, :, j0:j1].copy().reshape((p1 - p0) * nocc, -1), tau2.reshape(-1, nvir**2), 1, goovv.reshape(-1, nvir**2), 1) tau2 += numpy.einsum('ia,jb->ijab', t1[j0:j1], t1) tau2 = _cp(tau2.transpose(0, 3, 1, 2).reshape(-1, nov)) #:goovv[:,j0:j1] += numpy.einsum('ibld,jlda->ijab', pOvOv, tau2) * .5 #:goovv[:,j0:j1] -= numpy.einsum('iald,jldb->ijab', pOVov, tau2) * .5 goovv[:, j0:j1] += lib.dot(pOvOv.reshape(-1, nov), tau2.T, .5).reshape(p1 - p0, nvir, -1, nvir).transpose(0, 2, 3, 1) goovv[:, j0:j1] += lib.dot(pOVov.reshape(-1, nov), tau2.T, -.5).reshape(p1 - p0, nvir, -1, nvir).transpose(0, 2, 1, 3) tau2 = None #==== mem usage nocc**3 + nocc*nvir**2*7 #:goovv += numpy.einsum('iald,jlbd->ijab', pOVov*2+pOvOv, t2) * .5 pOVov *= 2 pOVov += pOvOv for j in range(nocc): tmp = lib.dot(pOVov.reshape(-1, nov), _cp(t2[j].transpose(0, 2, 1).reshape(-1, nvir)), .5) goovv[:, j] += tmp.reshape(-1, nvir, nvir) tmp = None dovov[p0:p1] = make_theta(goovv).transpose(0, 2, 1, 3) goooo = goovv = pOvOv = pOVov = None time1 = log.timer_debug1('rdm intermediates pass2 [%d:%d]' % (p0, p1), *time1) h5fobj['dooov'][:] = gooov.transpose(0, 2, 1, 3) * 2 - gooov.transpose( 1, 2, 0, 3) gooov = None max_memory = mycc.max_memory - lib.current_memory()[0] unit = max(nocc**2 * nvir * 2 + nocc * nvir**2 * 2, nvir**3 * 2 + nocc * nvir**2) blksize = min(nvir, max(ccsd.BLKMIN, int(max_memory * .95e6 / 8 / unit))) iobuflen = int(256e6 / 8 / blksize) log.debug1( 'rdm intermediates pass 3: block size = %d, nvir = %d in %d blocks', blksize, nocc, int((nvir + blksize - 1) / blksize)) h5fobj.create_group('dovvv') for istep, (p0, p1) in enumerate(prange(0, nvir, blksize)): pvOvO = numpy.empty((p1 - p0, nocc, nvir, nocc)) pvOVo = numpy.empty((p1 - p0, nocc, nvir, nocc)) ao2mo.outcore._load_from_h5g(fswap['mvOvO'], p0, p1, pvOvO) ao2mo.outcore._load_from_h5g(fswap['mvOVo'], p0, p1, pvOVo) #:gvovv -= numpy.einsum('aibk,kc->aibc', pvOvO, t1) #:gvovv += numpy.einsum('aick,kb->aibc', pvOVo, t1) gvovv = lib.dot(pvOVo.reshape(-1, nocc), t1).reshape(-1, nocc, nvir, nvir) for i in range(p1 - p0): gvovv[i] = gvovv[i].transpose(0, 2, 1) lib.dot(pvOvO.reshape(-1, nocc), t1, -1, gvovv.reshape(-1, nvir), 1) pvOvO = pvOVo = None #==== mem usage nocc**2*nvir*2 + nocc*nvir**2*2 l2tmp = l2[:, :, p0:p1] * .5 #:gvvvv = numpy.einsum('ijab,ijcd->abcd', l2tmp, t2) #:jabc = numpy.einsum('ijab,ic->jabc', l2tmp, t1) #:gvvvv += numpy.einsum('jabc,jd->abcd', jabc, t1) gvvvv = lib.dot(l2tmp.reshape(nocc**2, -1).T, t2.reshape(nocc**2, -1)) jabc = lib.dot(l2tmp.reshape(nocc, -1).T, t1) lib.dot(jabc.reshape(nocc, -1).T, t1, 1, gvvvv.reshape(-1, nvir), 1) gvvvv = gvvvv.reshape(-1, nvir, nvir, nvir) l2tmp = jabc = None #:gvovv = numpy.einsum('ja,jibc->aibc', l1[:,p0:p1], t2) #:gvovv += numpy.einsum('jibc,ja->aibc', l2, t1[:,p0:p1]) lib.dot(l1[:, p0:p1].copy().T, t2.reshape(nocc, -1), 1, gvovv.reshape(p1 - p0, -1), 1) lib.dot(t1[:, p0:p1].copy().T, l2.reshape(nocc, -1), 1, gvovv.reshape(p1 - p0, -1), 1) tmp = numpy.einsum('ja,jb->ab', l1[:, p0:p1], t1) gvovv += numpy.einsum('ab,ic->aibc', tmp, t1) gvovv += numpy.einsum('ba,ic->aibc', mvv[:, p0:p1] * .5, t1) #:gvovv -= numpy.einsum('adbc,id->aibc', gvvvv, t1*2) for j in range(p1 - p0): lib.dot(t1, gvvvv[j].reshape(nvir, -1), -2, gvovv[j].reshape(nocc, -1), 1) # symmetrize dvvvv because it is symmetrized in ccsd_grad and make_rdm2 anyway #:dvvvv = .5*(gvvvv+gvvvv.transpose(0,1,3,2)) #:dvvvv = .5*(dvvvv+dvvvv.transpose(1,0,3,2)) # now dvvvv == dvvvv.transpose(2,3,0,1) == dvvvv.transpose(0,1,3,2) == dvvvv.transpose(1,0,3,2) tmp = numpy.empty((nvir, nvir, nvir)) tmp1 = numpy.empty((nvir, nvir, nvir)) tmpvvvv = numpy.empty((p1 - p0, nvir, nvir_pair)) for i in range(p1 - p0): make_theta(gvvvv[i:i + 1], out=tmp) tmp1[:] = tmp.transpose(1, 0, 2) _ccsd.precontract(tmp1, diag_fac=2, out=tmpvvvv[i]) # tril of (dvvvv[p0:p1,p0:p1]+dvvvv[p0:p1,p0:p1].T) for i in range(p0, p1): for j in range(p0, i): tmpvvvv[i - p0, j] += tmpvvvv[j - p0, i] tmpvvvv[i - p0, i] *= 2 for i in range(p0, p1): off = i * (i + 1) // 2 if p0 > 0: tmpvvvv[i - p0, :p0] += dvvvv[off:off + p0] dvvvv[off:off + i + 1] = tmpvvvv[i - p0, :i + 1] * .25 for i in range(p1, nvir): off = i * (i + 1) // 2 dvvvv[off + p0:off + p1] = tmpvvvv[:, i] tmp = tmp1 = tmpvvvv = None #==== mem usage nvir**3 + nocc*nvir**2 gvvov = make_theta(gvovv).transpose(0, 2, 1, 3) ao2mo.outcore._transpose_to_h5g(h5fobj, 'dovvv/%d' % istep, gvvov.reshape(-1, nov), iobuflen) gvvvv = None gvovv = None time1 = log.timer_debug1('rdm intermediates pass3 [%d:%d]' % (p0, p1), *time1) del (fswap['mOvOv']) del (fswap['mOVov']) del (fswap['mvOvO']) del (fswap['mvOVo']) fswap.close() _tmpfile = None return (h5fobj['dovov'], h5fobj['dvvvv'], h5fobj['doooo'], h5fobj['doovv'], h5fobj['dovvo'], None, h5fobj['dovvv'], h5fobj['dooov'])
def IX_intermediates(mycc, t1, t2, l1, l2, eris=None, d1=None, d2=None): if eris is None: # Note eris are in Chemist's notation eris = ccsd._ERIS(mycc) if d1 is None: d1 = ccsd_rdm.gamma1_intermediates(mycc, t1, t2, l1, l2) doo, dov, dvo, dvv = d1 if d2 is None: _d2tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) fd2intermediate = h5py.File(_d2tmpfile.name, 'w') ccsd_rdm.gamma2_outcore(mycc, t1, t2, l1, l2, fd2intermediate) dovov = fd2intermediate['dovov'] dvvvv = fd2intermediate['dvvvv'] doooo = fd2intermediate['doooo'] doovv = fd2intermediate['doovv'] dovvo = fd2intermediate['dovvo'] dovvv = fd2intermediate['dovvv'] dooov = fd2intermediate['dooov'] else: dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov = d2 log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir nvir_pair = nvir * (nvir + 1) // 2 _tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) fswap = h5py.File(_tmpfile.name, 'w') fswap.create_group('e_vvov') fswap.create_group('c_vvov') # Note Ioo, Ivv are not hermitian Ioo = numpy.zeros((nocc, nocc)) Ivv = numpy.zeros((nvir, nvir)) Ivo = numpy.zeros((nvir, nocc)) Xvo = numpy.zeros((nvir, nocc)) eris_oooo = _cp(eris.oooo) eris_ooov = _cp(eris.ooov) d_oooo = _cp(doooo) d_oooo = _cp(d_oooo + d_oooo.transpose(1, 0, 2, 3)) #:Ioo += numpy.einsum('jmlk,imlk->ij', d_oooo, eris_oooo) * 2 Ioo += lib.dot(eris_oooo.reshape(nocc, -1), d_oooo.reshape(nocc, -1).T, 2) d_oooo = _cp(d_oooo.transpose(0, 2, 3, 1)) #:Xvo += numpy.einsum('iljk,ljka->ai', d_oooo, eris_ooov) * 2 Xvo += lib.dot( eris_ooov.reshape(-1, nvir).T, d_oooo.reshape(nocc, -1).T, 2) Xvo += (numpy.einsum('kj,kjia->ai', doo, eris_ooov) * 4 - numpy.einsum( 'kj,ikja->ai', doo + doo.T, eris_ooov)) eris_oooo = eris_ooov = d_oooo = None d_ovov = numpy.empty((nocc, nvir, nocc, nvir)) blksize = 8 for p0, p1 in prange(0, nocc, blksize): d_ovov[p0:p1] = _cp(dovov[p0:p1]) d_ovvo = _cp(dovvo[p0:p1]) for i in range(p0, p1): d_ovov[i] += d_ovvo[i - p0].transpose(0, 2, 1) d_ovvo = None d_ovov = lib.transpose_sum(d_ovov.reshape(nov, nov)).reshape( nocc, nvir, nocc, nvir) #:Ivo += numpy.einsum('jbka,jbki->ai', d_ovov, eris.ovoo) Ivo += lib.dot( d_ovov.reshape(-1, nvir).T, _cp(eris.ovoo).reshape(-1, nocc)) eris_ovov = _cp(eris.ovov) #:Ioo += numpy.einsum('jakb,iakb->ij', d_ovov, eris.ovov) #:Ivv += numpy.einsum('jcib,jcia->ab', d_ovov, eris.ovov) Ioo += lib.dot(eris_ovov.reshape(nocc, -1), d_ovov.reshape(nocc, -1).T) Ivv += lib.dot(eris_ovov.reshape(-1, nvir).T, d_ovov.reshape(-1, nvir)) eris_ovov = None fswap['dovvo'] = d_ovov.transpose(0, 1, 3, 2) d_ovov = None max_memory = mycc.max_memory - lib.current_memory()[0] unit = max(nvir**3 * 2.5, nvir**3 * 2 + nocc * nvir**2) blksize = max(ccsd.BLKMIN, int(max_memory * 1e6 / 8 / unit)) iobuflen = int(256e6 / 8 / (blksize * nvir)) log.debug1( 'IX_intermediates pass 1: block size = %d, nocc = %d in %d blocks', blksize, nocc, int((nocc + blksize - 1) / blksize)) for istep, (p0, p1) in enumerate(prange(0, nocc, blksize)): d_ooov = _cp(dooov[p0:p1]) eris_oooo = _cp(eris.oooo[p0:p1]) eris_ooov = _cp(eris.ooov[p0:p1]) #:Ivv += numpy.einsum('ijkb,ijka->ab', d_ooov, eris_ooov) #:Ivo += numpy.einsum('jlka,jlki->ai', d_ooov, eris_oooo) Ivv += lib.dot(eris_ooov.reshape(-1, nvir).T, d_ooov.reshape(-1, nvir)) Ivo += lib.dot(d_ooov.reshape(-1, nvir).T, eris_oooo.reshape(-1, nocc)) #:Ioo += numpy.einsum('klja,klia->ij', d_ooov, eris_ooov) #:Xvo += numpy.einsum('kjib,kjba->ai', d_ooov, eris.oovv) eris_oovv = _cp(eris.oovv[p0:p1]) tmp = _cp(d_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc)) Ioo += lib.dot( _cp(eris_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc)).T, tmp) Xvo += lib.dot(eris_oovv.reshape(-1, nvir).T, tmp) eris_oooo = tmp = None d_ooov = d_ooov + dooov[:, p0:p1].transpose(1, 0, 2, 3) eris_ovov = _cp(eris.ovov[p0:p1]) #:Ioo += numpy.einsum('ljka,lika->ij', d_ooov, eris_ooov) #:Xvo += numpy.einsum('jikb,jakb->ai', d_ooov, eris_ovov) for i in range(p1 - p0): lib.dot(eris_ooov[i].reshape(nocc, -1), d_ooov[i].reshape( nocc, -1).T, 1, Ioo, 1) lib.dot(eris_ovov[i].reshape(nvir, -1), d_ooov[i].reshape( nocc, -1).T, 1, Xvo, 1) d_ooov = None #:Ioo += numpy.einsum('kjba,kiba->ij', d_oovv, eris.oovv) #:Ivv += numpy.einsum('ijcb,ijca->ab', d_oovv, eris.oovv) #:Ivo += numpy.einsum('kjba,kjib->ai', d_oovv, eris.ooov) d_oovv = _cp(doovv[p0:p1]) + doovv[:, p0:p1].transpose(1, 0, 3, 2) for i in range(p1 - p0): Ioo += lib.dot(eris_oovv[i].reshape(nocc, -1), d_oovv[i].reshape( nocc, -1).T) Ivv += lib.dot(eris_oovv.reshape(-1, nvir).T, d_oovv.reshape(-1, nvir)) Ivo += lib.dot( d_oovv.reshape(-1, nvir).T, _cp(eris_ooov.transpose(0, 1, 3, 2).reshape(-1, nocc))) eris_ooov = None d_oovv = _ccsd.precontract(d_oovv.reshape(-1, nvir, nvir)).reshape( p1 - p0, nocc, -1) d_ovvv = numpy.empty((p1 - p0, nvir, nvir, nvir)) ao2mo.outcore._load_from_h5g(dovvv, p0 * nvir, p1 * nvir, d_ovvv.reshape(-1, nvir**2)) #:Ivo += numpy.einsum('jadc,jidc->ai', d_ovvv, eris_oovv) for i in range(p1 - p0): Ivo += lib.dot(d_ovvv[i].reshape(nvir, -1), eris_oovv[i].reshape( nocc, -1).T) eris_oovv = None # tril part of (d_ovvv + d_ovvv.transpose(0,1,3,2)) c_ovvv = _ccsd.precontract(d_ovvv.reshape(-1, nvir, nvir)) ao2mo.outcore._transpose_to_h5g(fswap, 'c_vvov/%d' % istep, c_ovvv, iobuflen) c_ovvv = c_ovvv.reshape(-1, nvir, nvir_pair) eris_ovx = _cp(eris.ovvv[p0:p1]) ao2mo.outcore._transpose_to_h5g(fswap, 'e_vvov/%d' % istep, eris_ovx.reshape(-1, nvir_pair), iobuflen) #:Xvo += numpy.einsum('jibc,jabc->ai', d_oovv, eris_ovvv) #:Ivv += numpy.einsum('ibdc,iadc->ab', d_ovvv, eris_ovvv) for i in range(p1 - p0): lib.dot(eris_ovx[i].reshape(nvir, -1), d_oovv[i].reshape(nocc, -1).T, 1, Xvo, 1) lib.dot(eris_ovx[i].reshape(nvir, -1), c_ovvv[i].reshape(nvir, -1).T, 1, Ivv, 1) c_ovvv = d_oovv = None eris_ovvo = numpy.empty((p1 - p0, nvir, nvir, nocc)) for i in range(p1 - p0): d_ovvv[i] = _ccsd.sum021(d_ovvv[i]) eris_ovvo[i] = eris_ovov[i].transpose(0, 2, 1) #:Ivo += numpy.einsum('abjc,ibjc->ai', d_ovvv, eris_ovov) Ivo += lib.dot(d_ovvv.reshape(-1, nvir).T, eris_ovvo.reshape(-1, nocc)) eris_ovvo = eris_ovov = None eris_ovvv = lib.unpack_tril(eris_ovx.reshape(-1, nvir_pair)) eris_ovx = None eris_ovvv = eris_ovvv.reshape(p1 - p0, nvir, nvir, nvir) #:Ivv += numpy.einsum('icdb,icda->ab', d_ovvv, eris_ovvv) #:Xvo += numpy.einsum('jibc,jabc->ai', d_oovv, eris_ovvv) Ivv += lib.dot(eris_ovvv.reshape(-1, nvir).T, d_ovvv.reshape(-1, nvir)) Xvo[:, p0:p1] += (numpy.einsum('cb,iacb->ai', dvv, eris_ovvv) * 4 - numpy.einsum('cb,icba->ai', dvv + dvv.T, eris_ovvv)) d_ovvo = _cp(fswap['dovvo'][p0:p1]) #:Xvo += numpy.einsum('jbic,jbca->ai', d_ovov, eris_ovvv) lib.dot( eris_ovvv.reshape(-1, nvir).T, d_ovvo.reshape(-1, nocc), 1, Xvo, 1) d_ovvv = d_ovvo = eris_ovvv = None max_memory = mycc.max_memory - lib.current_memory()[0] unit = nocc * nvir**2 + nvir**3 * 2.5 blksize = max(ccsd.BLKMIN, int(max_memory * 1e6 / 8 / unit)) log.debug1( 'IX_intermediates pass 2: block size = %d, nocc = %d in %d blocks', blksize, nocc, int((nocc + blksize - 1) / blksize)) for p0, p1 in prange(0, nvir, blksize): off0 = p0 * (p0 + 1) // 2 off1 = p1 * (p1 + 1) // 2 d_vvvv = _cp(dvvvv[off0:off1]) * 4 for i in range(p0, p1): d_vvvv[i * (i + 1) // 2 + i - off0] *= .5 d_vvvv = lib.unpack_tril(d_vvvv) eris_vvvv = lib.unpack_tril(_cp(eris.vvvv[off0:off1])) #:Ivv += numpy.einsum('decb,deca->ab', d_vvvv, eris_vvvv) * 2 #:Xvo += numpy.einsum('dbic,dbca->ai', d_vvov, eris_vvvv) lib.dot( eris_vvvv.reshape(-1, nvir).T, d_vvvv.reshape(-1, nvir), 2, Ivv, 1) #:d_vvvv = _cp(d_vvvv + d_vvvv.transpose(0,1,3,2)) d_vvov = numpy.empty((off1 - off0, nocc, nvir)) ao2mo.outcore._load_from_h5g(fswap['c_vvov'], off0, off1, d_vvov.reshape(-1, nov)) d_vvvo = _cp(d_vvov.transpose(0, 2, 1)) lib.dot( eris_vvvv.reshape(-1, nvir).T, d_vvvo.reshape(-1, nocc), 1, Xvo, 1) d_vvov = eris_vvvv = None eris_vvov = numpy.empty((off1 - off0, nocc, nvir)) ao2mo.outcore._load_from_h5g(fswap['e_vvov'], off0, off1, eris_vvov.reshape(-1, nov)) eris_vvvo = _cp(eris_vvov.transpose(0, 2, 1)) #:Ioo += numpy.einsum('abjc,abci->ij', d_vvov, eris_vvvo) #:Ivo += numpy.einsum('dbca,dbci->ai', d_vvvv, eris_vvvo) * 2 lib.dot( d_vvvv.reshape(-1, nvir).T, eris_vvvo.reshape(-1, nocc), 2, Ivo, 1) lib.dot( eris_vvvo.reshape(-1, nocc).T, d_vvvo.reshape(-1, nocc), 1, Ioo, 1) eris_vvov = eris_vovv = d_vvvv = None del (fswap['e_vvov']) del (fswap['c_vvov']) del (fswap['dovvo']) fswap.close() _tmpfile = None if d2 is None: for key in fd2intermediate.keys(): del (fd2intermediate[key]) fd2intermediate.close() _d2tmpfile = None Ioo *= -1 Ivv *= -1 Ivo *= -1 Xvo += Ivo return Ioo, Ivv, Ivo, Xvo
def IX_intermediates(mycc, t1, t2, l1, l2, eris=None, d1=None, d2=None): if eris is None: # Note eris are in Chemist's notation eris = ccsd._ERIS(mycc) if d1 is None: doo, dvv = ccsd_rdm.gamma1_intermediates(mycc, t1, t2, l1, l2) else: doo, dvv = d1 if d2 is None: d2 = ccsd_rdm.gamma2_incore(mycc, t1, t2, l1, l2) dovov, dvvvv, doooo, doovv, dovvo, dvvov, dovvv, dooov = d2 log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc * nvir # Note Ioo, Ivv are not hermitian Ioo = numpy.zeros((nocc,nocc)) Ivv = numpy.zeros((nvir,nvir)) Ivo = numpy.zeros((nvir,nocc)) Xvo = numpy.zeros((nvir,nocc)) eris_oooo = _cp(eris.oooo) eris_ooov = _cp(eris.ooov) d_oooo = _cp(doooo) d_oooo = _cp(d_oooo + d_oooo.transpose(1,0,2,3)) #:Ioo += numpy.einsum('jmlk,imlk->ij', d_oooo, eris_oooo) * 2 Ioo += lib.dot(eris_oooo.reshape(nocc,-1), d_oooo.reshape(nocc,-1).T, 2) d_oooo = _cp(d_oooo.transpose(0,2,3,1)) #:Xvo += numpy.einsum('iljk,ljka->ai', d_oooo, eris_ooov) * 2 Xvo += lib.dot(eris_ooov.reshape(-1,nvir).T, d_oooo.reshape(nocc,-1).T, 2) Xvo +=(numpy.einsum('kj,kjia->ai', doo, eris_ooov) * 4 - numpy.einsum('kj,ikja->ai', doo+doo.T, eris_ooov)) eris_oooo = eris_ooov = d_oooo = None d_ooov = _cp(dooov) eris_oooo = _cp(eris.oooo) eris_ooov = _cp(eris.ooov) #:Ivv += numpy.einsum('ijkb,ijka->ab', d_ooov, eris_ooov) #:Ivo += numpy.einsum('jlka,jlki->ai', d_ooov, eris_oooo) Ivv += lib.dot(eris_ooov.reshape(-1,nvir).T, d_ooov.reshape(-1,nvir)) Ivo += lib.dot(d_ooov.reshape(-1,nvir).T, eris_oooo.reshape(-1,nocc)) #:Ioo += numpy.einsum('klja,klia->ij', d_ooov, eris_ooov) #:Xvo += numpy.einsum('kjib,kjba->ai', d_ooov, eris.oovv) eris_oovv = _cp(eris.oovv) tmp = _cp(d_ooov.transpose(0,1,3,2).reshape(-1,nocc)) tmpooov = _cp(eris_ooov.transpose(0,1,3,2)) Ioo += lib.dot(tmpooov.reshape(-1,nocc).T, tmp) Xvo += lib.dot(eris_oovv.reshape(-1,nvir).T, tmp) eris_oooo = tmp = None d_ooov = d_ooov + d_ooov.transpose(1,0,2,3) eris_ovov = _cp(eris.ovov) #:Ioo += numpy.einsum('jlka,ilka->ij', d_ooov, eris_ooov) #:Xvo += numpy.einsum('ijkb,kbja->ai', d_ooov, eris.ovov) Ioo += lib.dot(eris_ooov.reshape(nocc,-1), d_ooov.reshape(nocc,-1).T) Xvo += lib.dot(eris_ovov.reshape(-1,nvir).T, _cp(d_ooov.transpose(0,2,3,1).reshape(nocc,-1)).T) d_ooov = None #:Ioo += numpy.einsum('kjba,kiba->ij', d_oovv, eris.oovv) #:Ivv += numpy.einsum('ijcb,ijca->ab', d_oovv, eris.oovv) #:Ivo += numpy.einsum('kjba,kjib->ai', d_oovv, eris.ooov) d_oovv = _cp(doovv + doovv.transpose(1,0,3,2)) for i in range(nocc): Ioo += lib.dot(eris_oovv[i].reshape(nocc, -1), d_oovv[i].reshape(nocc,-1).T) Ivv += lib.dot(eris_oovv.reshape(-1,nvir).T, d_oovv.reshape(-1,nvir)) Ivo += lib.dot(d_oovv.reshape(-1,nvir).T, tmpooov.reshape(-1,nocc)) d_oovv = _ccsd.precontract(d_oovv.reshape(-1,nvir,nvir)).reshape(nocc,nocc,-1) eris_ooov = tmpooov = None blksize = 4 d_ovov = numpy.empty((nocc,nvir,nocc,nvir)) for p0, p1 in prange(0, nocc, blksize): d_ovov[p0:p1] = _cp(dovov[p0:p1]) d_ovvo = _cp(dovvo[p0:p1]) for i in range(p0,p1): d_ovov[i] += d_ovvo[i-p0].transpose(0,2,1) d_ovvo = None #:d_ovov = d_ovov + d_ovov.transpose(2,3,0,1) lib.transpose_sum(d_ovov.reshape(nov,nov), inplace=True) #:Ivo += numpy.einsum('jbka,jbki->ai', d_ovov, eris.ovoo) Ivo += lib.dot(d_ovov.reshape(-1,nvir).T, _cp(eris.ovoo).reshape(-1,nocc)) #:Ioo += numpy.einsum('jakb,iakb->ij', d_ovov, eris.ovov) #:Ivv += numpy.einsum('jcib,jcia->ab', d_ovov, eris.ovov) Ioo += lib.dot(eris_ovov.reshape(nocc,-1), d_ovov.reshape(nocc,-1).T) Ivv += lib.dot(eris_ovov.reshape(-1,nvir).T, d_ovov.reshape(-1,nvir)) nvir_pair = nvir * (nvir+1) // 2 bufe_ovvv = numpy.empty((blksize,nvir,nvir,nvir)) bufc_ovvv = numpy.empty((blksize,nvir,nvir_pair)) bufc_ovvv.data = bufe_ovvv.data c_vvvo = numpy.empty((nvir_pair,nvir,nocc)) for p0, p1 in prange(0, nocc, blksize): d_ovvv = numpy.empty((p1-p0,nvir,nvir,nvir)) #:Ivo += numpy.einsum('jadc,jidc->ai', d_ovvv, eris_oovv) for i in range(p1-p0): lib.dot(dovvv[p0+i].reshape(nvir,-1), eris_oovv[p0+i].reshape(nocc,-1).T, 1, Ivo, 1) c_ovvv = bufc_ovvv[:p1-p0] # tril part of (d_ovvv + d_ovvv.transpose(0,1,3,2)) _ccsd.precontract(dovvv[p0:p1].reshape(-1,nvir,nvir), out=c_ovvv) for i0, i1, in prange(0, nvir_pair, BLKSIZE): for j0, j1 in prange(0, nvir, BLKSIZE//(p1-p0)+1): c_vvvo[i0:i1,j0:j1,p0:p1] = c_ovvv[:,j0:j1,i0:i1].transpose(2,1,0) eris_ovx = _cp(eris.ovvv[p0:p1]) #:Xvo += numpy.einsum('jibc,jabc->ai', d_oovv, eris_ovvv) #:Ivv += numpy.einsum('ibdc,iadc->ab', d_ovvv, eris_ovvv) for i in range(p1-p0): lib.dot(eris_ovx[i].reshape(nvir,-1), d_oovv[p0+i].reshape(nocc,-1).T, 1, Xvo, 1) lib.dot(eris_ovx[i].reshape(nvir,-1), c_ovvv[i].reshape(nvir,-1).T, 1, Ivv, 1) eris_ovvv = bufe_ovvv[:p1-p0] _ccsd.unpack_tril(eris_ovx.reshape(-1,nvir_pair), out=eris_ovvv.reshape(-1,nvir**2)) eris_ovx = None #:Xvo += numpy.einsum('icjb,acjb->ai', d_ovov, eris_vvov) d_ovvo = _cp(d_ovov[p0:p1].transpose(0,1,3,2)) lib.dot(eris_ovvv.reshape(-1,nvir).T, d_ovvo.reshape(-1,nocc), 1, Xvo, 1) e_ovvo, d_ovvo = d_ovvo, None for i in range(p1-p0): d_ovvv[i] = _ccsd.sum021(dovvv[p0+i]) e_ovvo[i] = eris_ovov[p0+i].transpose(0,2,1) #:Ivo += numpy.einsum('jcab,jcib->ai', d_ovvv, eris_ovov) #:Ivv += numpy.einsum('icdb,icda->ab', d_ovvv, eris_ovvv) lib.dot(d_ovvv.reshape(-1,nvir).T, e_ovvo[:p1-p0].reshape(-1,nocc), 1, Ivo, 1) lib.dot(eris_ovvv.reshape(-1,nvir).T, d_ovvv.reshape(-1,nvir), 1, Ivv, 1) Xvo[:,p0:p1] +=(numpy.einsum('cb,iacb->ai', dvv, eris_ovvv) * 4 - numpy.einsum('cb,icba->ai', dvv+dvv.T, eris_ovvv)) d_oovv = d_ovvv = bufc_ovvv = bufe_ovvv = None eris_ovov = eris_ovvv = eris_oovv = e_ovvo = None eris_ovvv = _cp(eris.ovvv) bufe_vvvo = numpy.empty((blksize*nvir,nvir,nocc)) bufe_vvvv = numpy.empty((blksize*nvir,nvir,nvir)) bufd_vvvv = numpy.empty((blksize*nvir,nvir,nvir)) for p0, p1 in prange(0, nvir, blksize): off0 = p0*(p0+1)//2 off1 = p1*(p1+1)//2 d_vvvv = _cp(dvvvv[off0:off1]) * 4 for i in range(p0, p1): d_vvvv[i*(i+1)//2+i-off0] *= .5 d_vvvv = _ccsd.unpack_tril(d_vvvv, out=bufd_vvvv[:off1-off0]) eris_vvvv = _ccsd.unpack_tril(eris.vvvv[off0:off1], out=bufe_vvvv[:off1-off0]) #:Ivv += numpy.einsum('decb,deca->ab', d_vvvv, eris_vvvv) * 2 #:Xvo += numpy.einsum('icdb,acdb->ai', d_ovvv, eris_vvvv) lib.dot(eris_vvvv.reshape(-1,nvir).T, d_vvvv.reshape(-1,nvir), 2, Ivv, 1) d_vvvo = _cp(c_vvvo[off0:off1]) lib.dot(eris_vvvv.reshape(-1,nvir).T, d_vvvo.reshape(-1,nocc), 1, Xvo, 1) #:Ioo += numpy.einsum('abjc,abci->ij', d_vvov, eris_vvvo) #:Ivo += numpy.einsum('dbca,dbci->ai', d_vvvv, eris_vvvo) * 2 eris_vvvo = bufe_vvvo[:off1-off0] for i0, i1 in prange(off0, off1, BLKSIZE): for j0, j1, in prange(0, nvir, BLKSIZE//nocc+1): eris_vvvo[i0-off0:i1-off0,j0:j1,:] = eris_ovvv[:,j0:j1,i0:i1].transpose(2,1,0) lib.dot(eris_vvvo.reshape(-1,nocc).T, d_vvvo.reshape(-1,nocc), 1, Ioo, 1) lib.dot(d_vvvv.reshape(-1,nvir).T, eris_vvvo.reshape(-1,nocc), 2, Ivo, 1) Ioo *= -1 Ivv *= -1 Ivo *= -1 Xvo += Ivo return Ioo, Ivv, Ivo, Xvo