Exemplo n.º 1
0
    def make_kpt(kpt):  # kpt = kptj - kpti
        # search for all possible ki and kj that has ki-kj+kpt=0
        kk_match = numpy.einsum('ijx->ij', abs(kk_table + kpt)) < 1e-9
        kpti_idx, kptj_idx = numpy.where(kk_todo & kk_match)
        nkptj = len(kptj_idx)
        log.debug1('kpt = %s', kpt)
        log.debug2('kpti_idx = %s', kpti_idx)
        log.debug2('kptj_idx = %s', kptj_idx)
        kk_todo[kpti_idx,kptj_idx] = False
        if swap_2e and not is_zero(kpt):
            kk_todo[kptj_idx,kpti_idx] = False

        max_memory1 = max_memory * (nkptj+1)/(nkptj+5)
        blksize = max(int(max_memory1*4e6/(nkptj+5)/16/nao**2), 16)
        bufR = numpy.empty((blksize*nao**2))
        bufI = numpy.empty((blksize*nao**2))
        # Use DF object to mimic KRHF/KUHF object in function get_coulG
        mydf.exxdiv = exxdiv
        vkcoulG = mydf.weighted_coulG(kpt, True, mydf.gs)
        kptjs = kpts[kptj_idx]
        # <r|-G+k_rs|s> = conj(<s|G-k_rs|r>) = conj(<s|G+k_sr|r>)
        for k, pqkR, pqkI, p0, p1 \
                in mydf.ft_loop(mydf.gs, kpt, kptjs, max_memory=max_memory1):
            ki = kpti_idx[k]
            kj = kptj_idx[k]
            coulG = numpy.sqrt(vkcoulG[p0:p1])

# case 1: k_pq = (pi|iq)
#:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj())
#:vk += numpy.einsum('ijkl,jk->il', v4, dm)
            pqkR *= coulG
            pqkI *= coulG
            pLqR = lib.transpose(pqkR.reshape(nao,nao,-1), axes=(0,2,1), out=bufR)
            pLqI = lib.transpose(pqkI.reshape(nao,nao,-1), axes=(0,2,1), out=bufI)
            iLkR = numpy.empty((nao*(p1-p0),nao))
            iLkI = numpy.empty((nao*(p1-p0),nao))
            for i in range(nset):
                iLkR, iLkI = zdotNN(pLqR.reshape(-1,nao), pLqI.reshape(-1,nao),
                                    dmsR[i,kj], dmsI[i,kj], 1, iLkR, iLkI)
                zdotNC(iLkR.reshape(nao,-1), iLkI.reshape(nao,-1),
                       pLqR.reshape(nao,-1).T, pLqI.reshape(nao,-1).T,
                       1, vkR[i,ki], vkI[i,ki], 1)

# case 2: k_pq = (iq|pi)
#:v4 = numpy.einsum('iLj,lLk->ijkl', pqk, pqk.conj())
#:vk += numpy.einsum('ijkl,li->kj', v4, dm)
            if swap_2e and not is_zero(kpt):
                iLkR = iLkR.reshape(nao,-1)
                iLkI = iLkI.reshape(nao,-1)
                for i in range(nset):
                    iLkR, iLkI = zdotNN(dmsR[i,ki], dmsI[i,ki], pLqR.reshape(nao,-1),
                                        pLqI.reshape(nao,-1), 1, iLkR, iLkI)
                    zdotCN(pLqR.reshape(-1,nao).T, pLqI.reshape(-1,nao).T,
                           iLkR.reshape(-1,nao), iLkI.reshape(-1,nao),
                           1, vkR[i,kj], vkI[i,kj], 1)
            pqkR = pqkI = coulG = pLqR = pLqI = iLkR = iLkI = None
Exemplo n.º 2
0
    def sr_loop(self,
                kpti_kptj=numpy.zeros((2, 3)),
                max_memory=2000,
                compact=True,
                blksize=None):
        '''Short range part'''
        kpti, kptj = kpti_kptj
        unpack = is_zero(kpti - kptj) and not compact
        is_real = is_zero(kpti_kptj)
        nao = self.cell.nao_nr()
        if blksize is None:
            if is_real:
                if unpack:
                    blksize = max_memory * 1e6 / 8 / (nao *
                                                      (nao + 1) // 2 + nao**2)
                else:
                    blksize = max_memory * 1e6 / 8 / (nao * (nao + 1))
            else:
                blksize = max_memory * 1e6 / 16 / (nao**2 * 2)
            blksize = max(16, min(int(blksize), self.blockdim))
            logger.debug3(self, 'max_memory %d MB, blksize %d', max_memory,
                          blksize)

        if unpack:
            buf = numpy.empty((blksize, nao * (nao + 1) // 2))

        def load(Lpq, b0, b1, bufR, bufI):
            Lpq = numpy.asarray(Lpq[b0:b1])
            if is_real:
                if unpack:
                    LpqR = lib.unpack_tril(Lpq, out=bufR).reshape(-1, nao**2)
                else:
                    LpqR = Lpq
                LpqI = numpy.zeros_like(LpqR)
            else:
                shape = Lpq.shape
                if unpack:
                    tmp = numpy.ndarray(shape, buffer=buf)
                    tmp[:] = Lpq.real
                    LpqR = lib.unpack_tril(tmp, out=bufR).reshape(-1, nao**2)
                    tmp[:] = Lpq.imag
                    LpqI = lib.unpack_tril(tmp, lib.ANTIHERMI,
                                           out=bufI).reshape(-1, nao**2)
                else:
                    LpqR = numpy.ndarray(shape, buffer=bufR)
                    LpqR[:] = Lpq.real
                    LpqI = numpy.ndarray(shape, buffer=bufI)
                    LpqI[:] = Lpq.imag
            return LpqR, LpqI

        LpqR = LpqI = None
        with _load3c(self._cderi, 'j3c', kpti_kptj) as j3c:
            naux = j3c.shape[0]
            for b0, b1 in lib.prange(0, naux, blksize):
                LpqR, LpqI = load(j3c, b0, b1, LpqR, LpqI)
                yield LpqR, LpqI
Exemplo n.º 3
0
    def make_kpt(kpt):  # kpt = kptj - kpti
        # search for all possible ki and kj that has ki-kj+kpt=0
        kk_match = numpy.einsum('ijx->ij', abs(kk_table + kpt)) < 1e-9
        kpti_idx, kptj_idx = numpy.where(kk_todo & kk_match)
        nkptj = len(kptj_idx)
        log.debug1('kpt = %s', kpt)
        log.debug2('kpti_idx = %s', kpti_idx)
        log.debug2('kptj_idx = %s', kptj_idx)
        kk_todo[kpti_idx,kptj_idx] = False
        if swap_2e and not is_zero(kpt):
            kk_todo[kptj_idx,kpti_idx] = False

        max_memory1 = max_memory * (nkptj+1)/(nkptj+5)
        blksize = max(int(max_memory1*4e6/(nkptj+5)/16/nao**2), 16)
        bufR = numpy.empty((blksize*nao**2))
        bufI = numpy.empty((blksize*nao**2))
        # Use DF object to mimic KRHF/KUHF object in function get_coulG
        mydf.exxdiv = exxdiv
        vkcoulG = mydf.weighted_coulG(kpt, True, mydf.gs)
        kptjs = kpts[kptj_idx]
        # <r|-G+k_rs|s> = conj(<s|G-k_rs|r>) = conj(<s|G+k_sr|r>)
        for k, pqkR, pqkI, p0, p1 \
                in mydf.ft_loop(mydf.gs, kpt, kptjs, max_memory=max_memory1):
            ki = kpti_idx[k]
            kj = kptj_idx[k]
            coulG = numpy.sqrt(vkcoulG[p0:p1])

# case 1: k_pq = (pi|iq)
#:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj())
#:vk += numpy.einsum('ijkl,jk->il', v4, dm)
            pqkR *= coulG
            pqkI *= coulG
            pLqR = lib.transpose(pqkR.reshape(nao,nao,-1), axes=(0,2,1), out=bufR)
            pLqI = lib.transpose(pqkI.reshape(nao,nao,-1), axes=(0,2,1), out=bufI)
            iLkR = numpy.empty((nao*(p1-p0),nao))
            iLkI = numpy.empty((nao*(p1-p0),nao))
            for i in range(nset):
                iLkR, iLkI = zdotNN(pLqR.reshape(-1,nao), pLqI.reshape(-1,nao),
                                    dmsR[i,kj], dmsI[i,kj], 1, iLkR, iLkI)
                zdotNC(iLkR.reshape(nao,-1), iLkI.reshape(nao,-1),
                       pLqR.reshape(nao,-1).T, pLqI.reshape(nao,-1).T,
                       1, vkR[i,ki], vkI[i,ki], 1)

# case 2: k_pq = (iq|pi)
#:v4 = numpy.einsum('iLj,lLk->ijkl', pqk, pqk.conj())
#:vk += numpy.einsum('ijkl,li->kj', v4, dm)
            if swap_2e and not is_zero(kpt):
                iLkR = iLkR.reshape(nao,-1)
                iLkI = iLkI.reshape(nao,-1)
                for i in range(nset):
                    iLkR, iLkI = zdotNN(dmsR[i,ki], dmsI[i,ki], pLqR.reshape(nao,-1),
                                        pLqI.reshape(nao,-1), 1, iLkR, iLkI)
                    zdotCN(pLqR.reshape(-1,nao).T, pLqI.reshape(-1,nao).T,
                           iLkR.reshape(-1,nao), iLkI.reshape(-1,nao),
                           1, vkR[i,kj], vkI[i,kj], 1)
            pqkR = pqkI = coulG = pLqR = pLqI = iLkR = iLkI = None
Exemplo n.º 4
0
Arquivo: mdf.py Projeto: eronca/pyscf
    def sr_loop(self, kpti_kptj=numpy.zeros((2,3)), max_memory=2000,
                compact=True, blksize=None):
        '''Short range part'''
        kpti, kptj = kpti_kptj
        unpack = is_zero(kpti-kptj) and not compact
        is_real = is_zero(kpti_kptj)
        nao = self.cell.nao_nr()
        if blksize is None:
            if is_real:
                if unpack:
                    blksize = max_memory*1e6/8/(nao*(nao+1)//2+nao**2*2)
                else:
                    blksize = max_memory*1e6/8/(nao*(nao+1)*2)
            else:
                blksize = max_memory*1e6/16/(nao**2*3)
            blksize = max(16, min(int(blksize), self.blockdim))
            logger.debug2(self, 'max_memory %d MB, blksize %d', max_memory, blksize)

        if unpack:
            buf = numpy.empty((blksize,nao*(nao+1)//2))
        def load(Lpq, b0, b1, bufR, bufI):
            Lpq = numpy.asarray(Lpq[b0:b1])
            if is_real:
                if unpack:
                    LpqR = lib.unpack_tril(Lpq, out=bufR).reshape(-1,nao**2)
                else:
                    LpqR = Lpq
                LpqI = numpy.zeros_like(LpqR)
            else:
                shape = Lpq.shape
                if unpack:
                    tmp = numpy.ndarray(shape, buffer=buf)
                    tmp[:] = Lpq.real
                    LpqR = lib.unpack_tril(tmp, out=bufR).reshape(-1,nao**2)
                    tmp[:] = Lpq.imag
                    LpqI = lib.unpack_tril(tmp, lib.ANTIHERMI, out=bufI).reshape(-1,nao**2)
                else:
                    LpqR = numpy.ndarray(shape, buffer=bufR)
                    LpqR[:] = Lpq.real
                    LpqI = numpy.ndarray(shape, buffer=bufI)
                    LpqI[:] = Lpq.imag
            return LpqR, LpqI

        LpqR = LpqI = j3cR = j3cI = None
        with self.load_Lpq(kpti_kptj) as Lpq:
            naux = Lpq.shape[0]
            with self.load_j3c(kpti_kptj) as j3c:
                for b0, b1 in lib.prange(0, naux, blksize):
                    LpqR, LpqI = load(Lpq, b0, b1, LpqR, LpqI)
                    j3cR, j3cI = load(j3c, b0, b1, j3cR, j3cI)
                    yield LpqR, LpqI, j3cR, j3cI
Exemplo n.º 5
0
 def load_Lpq(self, kpti_kptj=numpy.zeros((2,3))):
     with h5py.File(self._cderi, 'r') as f:
         if self.approx_sr_level == 0:
             return _load3c(self._cderi, 'Lpq', kpti_kptj)
         else:
             kpti, kptj = kpti_kptj
             if is_zero(kpti-kptj):
                 return pyscf.df.addons.load(self._cderi, 'Lpq/0')
             else:
                 # See _fake_Lpq_kpts
                 return pyscf.df.addons.load(self._cderi, 'Lpq/1')
Exemplo n.º 6
0
def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1,3)),
               kpts_band=None, exxdiv=None):
    mydf = _sync_mydf(mydf)
    cell = mydf.cell
    mesh = mydf.mesh
    coords = cell.gen_uniform_grids(mesh)
    ngrids = coords.shape[0]

    if hasattr(dm_kpts, 'mo_coeff'):
        if dm_kpts.ndim == 3:  # KRHF
            mo_coeff = [dm_kpts.mo_coeff]
            mo_occ   = [dm_kpts.mo_occ  ]
        else:  # KUHF
            mo_coeff = dm_kpts.mo_coeff
            mo_occ   = dm_kpts.mo_occ
    elif hasattr(dm_kpts[0], 'mo_coeff'):
        mo_coeff = [dm.mo_coeff for dm in dm_kpts]
        mo_occ   = [dm.mo_occ   for dm in dm_kpts]
    else:
        mo_coeff = None

    kpts = numpy.asarray(kpts)
    dm_kpts = lib.asarray(dm_kpts, order='C')
    dms = _format_dms(dm_kpts, kpts)
    nset, nkpts, nao = dms.shape[:3]

    weight = 1./nkpts * (cell.vol/ngrids)

    kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band
    nband = len(kpts_band)

    if gamma_point(kpts_band) and gamma_point(kpts):
        vk_kpts = numpy.zeros((nset,nband,nao,nao), dtype=dms.dtype)
    else:
        vk_kpts = numpy.zeros((nset,nband,nao,nao), dtype=numpy.complex128)

    coords = mydf.grids.coords
    ao2_kpts = [numpy.asarray(ao.T, order='C')
                for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts)]
    if input_band is None:
        ao1_kpts = ao2_kpts
    else:
        ao1_kpts = [numpy.asarray(ao.T, order='C')
                    for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts_band)]

    mem_now = lib.current_memory()[0]
    max_memory = mydf.max_memory - mem_now
    blksize = int(min(nao, max(1, (max_memory-mem_now)*1e6/16/4/ngrids/nao)))
    lib.logger.debug1(mydf, 'max_memory %s  blksize %d', max_memory, blksize)
    ao1_dtype = numpy.result_type(*ao1_kpts)
    ao2_dtype = numpy.result_type(*ao2_kpts)
    vR_dm = numpy.empty((nset,nao,ngrids), dtype=vk_kpts.dtype)

    ao_dms_buf = [None] * nkpts
    tasks = [(k1,k2) for k2 in range(nkpts) for k1 in range(nband)]
    for k1, k2 in mpi.static_partition(tasks):
        ao1T = ao1_kpts[k1]
        ao2T = ao2_kpts[k2]
        kpt1 = kpts_band[k1]
        kpt2 = kpts[k2]
        if ao2T.size == 0 or ao1T.size == 0:
            continue

        # If we have an ewald exxdiv, we add the G=0 correction near the
        # end of the function to bypass any discretization errors
        # that arise from the FFT.
        mydf.exxdiv = exxdiv
        if exxdiv == 'ewald' or exxdiv is None:
            coulG = tools.get_coulG(cell, kpt2-kpt1, False, mydf, mesh)
        else:
            coulG = tools.get_coulG(cell, kpt2-kpt1, True, mydf, mesh)
        if is_zero(kpt1-kpt2):
            expmikr = numpy.array(1.)
        else:
            expmikr = numpy.exp(-1j * numpy.dot(coords, kpt2-kpt1))

        if ao_dms_buf[k2] is None:
            if mo_coeff is None:
                ao_dms = [lib.dot(dm[k2], ao2T.conj()) for dm in dms]
            else:
                ao_dms = []
                for i, dm in enumerate(dms):
                    occ = mo_occ[i][k2]
                    mo_scaled = mo_coeff[i][k2][:,occ>0] * numpy.sqrt(occ[occ>0])
                    ao_dms.append(lib.dot(mo_scaled.T, ao2T).conj())
            ao_dms_buf[k2] = ao_dms
        else:
            ao_dms = ao_dms_buf[k2]

        if mo_coeff is None:
            for p0, p1 in lib.prange(0, nao, blksize):
                rho1 = numpy.einsum('ig,jg->ijg', ao1T[p0:p1].conj()*expmikr, ao2T)
                vG = tools.fft(rho1.reshape(-1,ngrids), mesh)
                rho1 = None
                vG *= coulG
                vR = tools.ifft(vG, mesh).reshape(p1-p0,nao,ngrids)
                vG = None
                if vR_dm.dtype == numpy.double:
                    vR = vR.real
                for i in range(nset):
                    numpy.einsum('ijg,jg->ig', vR, ao_dms[i], out=vR_dm[i,p0:p1])
                vR = None
        else:
            for p0, p1 in lib.prange(0, nao, blksize):
                for i in range(nset):
                    rho1 = numpy.einsum('ig,jg->ijg',
                                        ao1T[p0:p1].conj()*expmikr,
                                        ao_dms[i].conj())
                    vG = tools.fft(rho1.reshape(-1,ngrids), mesh)
                    rho1 = None
                    vG *= coulG
                    vR = tools.ifft(vG, mesh).reshape(p1-p0,-1,ngrids)
                    vG = None
                    if vR_dm.dtype == numpy.double:
                        vR = vR.real
                    numpy.einsum('ijg,jg->ig', vR, ao_dms[i], out=vR_dm[i,p0:p1])
                    vR = None
        vR_dm *= expmikr.conj()

        for i in range(nset):
            vk_kpts[i,k1] += weight * lib.dot(vR_dm[i], ao1T.T)

    vk_kpts = mpi.reduce(lib.asarray(vk_kpts))
    if gamma_point(kpts_band) and gamma_point(kpts):
        vk_kpts = vk_kpts.real

    if rank == 0:
        if exxdiv == 'ewald':
            _ewald_exxdiv_for_G0(cell, kpts, dms, vk_kpts, kpts_band=kpts_band)
        return _format_jks(vk_kpts, dm_kpts, input_band, kpts)
Exemplo n.º 7
0
    def make_kpt(uniq_kptji_id):  # kpt = kptj - kpti
        kpt = uniq_kpts[uniq_kptji_id]
        log.debug1('kpt = %s', kpt)
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)
        log.debug1('adapted_ji_idx = %s', adapted_ji_idx)
        kLR = kLRs[uniq_kptji_id]
        kLI = kLIs[uniq_kptji_id]

        if is_zero(kpt):  # kpti == kptj
            aosym = 's2'
            nao_pair = nao * (nao + 1) // 2

            vbar = fuse(mydf.auxbar(fused_cell))
            ovlp = cell.pbc_intor('cint1e_ovlp_sph',
                                  hermi=1,
                                  kpts=adapted_kptjs)
            for k, ji in enumerate(adapted_ji_idx):
                ovlp[k] = lib.pack_tril(ovlp[k])
        else:
            aosym = 's1'
            nao_pair = nao**2

        mem_now = lib.current_memory()[0]
        log.debug2('memory = %s', mem_now)
        max_memory = max(2000, mydf.max_memory - mem_now)
        # nkptj for 3c-coulomb arrays plus 1 Lpq array
        buflen = min(
            max(int(max_memory * .6 * 1e6 / 16 / naux / (nkptj + 1)), 1),
            nao_pair)
        shranges = _guess_shell_ranges(cell, buflen, aosym)
        buflen = max([x[2] for x in shranges])
        # +1 for a pqkbuf
        if aosym == 's2':
            Gblksize = max(
                16, int(max_memory * .2 * 1e6 / 16 / buflen / (nkptj + 1)))
        else:
            Gblksize = max(
                16, int(max_memory * .4 * 1e6 / 16 / buflen / (nkptj + 1)))
        Gblksize = min(Gblksize, ngs, 16384)
        pqkRbuf = numpy.empty(buflen * Gblksize)
        pqkIbuf = numpy.empty(buflen * Gblksize)
        # buf for ft_aopair
        buf = numpy.zeros((nkptj, buflen * Gblksize), dtype=numpy.complex128)

        col1 = 0
        for istep, sh_range in enumerate(shranges):
            log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \
                       istep+1, len(shranges), *sh_range)
            bstart, bend, ncol = sh_range
            col0, col1 = col1, col1 + ncol
            j3cR = []
            j3cI = []
            for k, idx in enumerate(adapted_ji_idx):
                v = numpy.asarray(feri['j3c/%d' % idx][:, col0:col1])
                if is_zero(kpt):
                    for i, c in enumerate(vbar):
                        if c != 0:
                            v[i] -= c * ovlp[k][col0:col1]
                j3cR.append(numpy.asarray(v.real, order='C'))
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    j3cI.append(None)
                else:
                    j3cI.append(numpy.asarray(v.imag, order='C'))
            v = None

            if aosym == 's2':
                shls_slice = (bstart, bend, 0, bend)
                for p0, p1 in lib.prange(0, ngs, Gblksize):
                    ft_ao._ft_aopair_kpts(cell,
                                          Gv[p0:p1],
                                          shls_slice,
                                          aosym,
                                          b,
                                          gxyz[p0:p1],
                                          Gvbase,
                                          kpt,
                                          adapted_kptjs,
                                          out=buf)
                    nG = p1 - p0
                    for k, ji in enumerate(adapted_ji_idx):
                        aoao = numpy.ndarray((nG, ncol),
                                             dtype=numpy.complex128,
                                             order='F',
                                             buffer=buf[k])
                        pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf)
                        pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf)
                        pqkR[:] = aoao.real.T
                        pqkI[:] = aoao.imag.T
                        aoao[:] = 0
                        lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1)
                        lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1)
                        if not (is_zero(kpt)
                                and gamma_point(adapted_kptjs[k])):
                            lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:],
                                    1)
                            lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1)
            else:
                shls_slice = (bstart, bend, 0, cell.nbas)
                ni = ncol // nao
                for p0, p1 in lib.prange(0, ngs, Gblksize):
                    ft_ao._ft_aopair_kpts(cell,
                                          Gv[p0:p1],
                                          shls_slice,
                                          aosym,
                                          b,
                                          gxyz[p0:p1],
                                          Gvbase,
                                          kpt,
                                          adapted_kptjs,
                                          out=buf)
                    nG = p1 - p0
                    for k, ji in enumerate(adapted_ji_idx):
                        aoao = numpy.ndarray((nG, ni, nao),
                                             dtype=numpy.complex128,
                                             order='F',
                                             buffer=buf[k])
                        pqkR = numpy.ndarray((ni, nao, nG), buffer=pqkRbuf)
                        pqkI = numpy.ndarray((ni, nao, nG), buffer=pqkIbuf)
                        pqkR[:] = aoao.real.transpose(1, 2, 0)
                        pqkI[:] = aoao.imag.transpose(1, 2, 0)
                        aoao[:] = 0
                        pqkR = pqkR.reshape(-1, nG)
                        pqkI = pqkI.reshape(-1, nG)
                        zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T, -1,
                               j3cR[k][naux:], j3cI[k][naux:], 1)

            naux0 = nauxs[uniq_kptji_id]
            for k, ji in enumerate(adapted_ji_idx):
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    v = fuse(j3cR[k])
                else:
                    v = fuse(j3cR[k] + j3cI[k] * 1j)
                if j2c[uniq_kptji_id][0] == 'CD':
                    v = scipy.linalg.solve_triangular(j2c[uniq_kptji_id][1],
                                                      v,
                                                      lower=True,
                                                      overwrite_b=True)
                else:
                    v = lib.dot(j2c[uniq_kptji_id][1], v)
                feri['j3c/%d' % ji][:naux0, col0:col1] = v

        naux0 = nauxs[uniq_kptji_id]
        for k, ji in enumerate(adapted_ji_idx):
            v = feri['j3c/%d' % ji][:naux0]
            del (feri['j3c/%d' % ji])
            feri['j3c/%d' % ji] = v
Exemplo n.º 8
0
def _make_j3c(mydf, cell, auxcell, kptij_lst):
    t1 = (time.clock(), time.time())
    log = logger.Logger(mydf.stdout, mydf.verbose)
    max_memory = max(2000, mydf.max_memory - lib.current_memory()[0])
    fused_cell, fuse = fuse_auxcell(mydf, auxcell)
    outcore.aux_e2(cell,
                   fused_cell,
                   mydf._cderi,
                   'cint3c2e_sph',
                   kptij_lst=kptij_lst,
                   dataname='j3c',
                   max_memory=max_memory)
    t1 = log.timer_debug1('3c2e', *t1)

    nao = cell.nao_nr()
    naux = auxcell.nao_nr()
    gs = mydf.gs
    Gv, Gvbase, kws = cell.get_Gv_weights(gs)
    b = cell.reciprocal_vectors()
    gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase])
    ngs = gxyz.shape[0]

    kptis = kptij_lst[:, 0]
    kptjs = kptij_lst[:, 1]
    kpt_ji = kptjs - kptis
    uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji)
    # j2c ~ (-kpt_ji | kpt_ji)
    j2c = fused_cell.pbc_intor('cint2c2e_sph', hermi=1, kpts=uniq_kpts)
    kLRs = []
    kLIs = []

    # An alternative method to evalute j2c. This method might have larger numerical error?
    #    chgcell = make_modchg_basis(auxcell, mydf.eta)
    #    for k, kpt in enumerate(uniq_kpts):
    #        aoaux = ft_ao.ft_ao(chgcell, Gv, None, b, gxyz, Gvbase, kpt).T
    #        coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs))
    #        LkR = aoaux.real * coulG
    #        LkI = aoaux.imag * coulG
    #        j2caux = numpy.zeros_like(j2c[k])
    #        j2caux[naux:,naux:] = j2c[k][naux:,naux:]
    #        if is_zero(kpt):  # kpti == kptj
    #            j2caux[naux:,naux:] -= lib.ddot(LkR, LkR.T)
    #            j2caux[naux:,naux:] -= lib.ddot(LkI, LkI.T)
    #            j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T)
    #            vbar = fuse(mydf.auxbar(fused_cell))
    #            s = (vbar != 0).astype(numpy.double)
    #            j2c[k] -= numpy.einsum('i,j->ij', vbar, s)
    #            j2c[k] -= numpy.einsum('i,j->ij', s, vbar)
    #        else:
    #            j2cR, j2cI = zdotCN(LkR, LkI, LkR.T, LkI.T)
    #            j2caux[naux:,naux:] -= j2cR + j2cI * 1j
    #            j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T)
    #
    #        try:
    #            j2c[k] = scipy.linalg.cholesky(j2c[k], lower=True)
    #        except scipy.linalg.LinAlgError as e:
    #            msg =('===================================\n'
    #                  'J-metric not positive definite.\n'
    #                  'It is likely that gs is not enough.\n'
    #                  '===================================')
    #            log.error(msg)
    #            raise scipy.linalg.LinAlgError('\n'.join([e.message, msg]))
    #        kLR = LkR.T
    #        kLI = LkI.T
    #        if not kLR.flags.c_contiguous: kLR = lib.transpose(LkR)
    #        if not kLI.flags.c_contiguous: kLI = lib.transpose(LkI)
    #        kLR *= coulG.reshape(-1,1)
    #        kLI *= coulG.reshape(-1,1)
    #        kLRs.append(kLR)
    #        kLIs.append(kLI)
    #        aoaux = LkR = LkI = kLR = kLI = coulG = None

    for k, kpt in enumerate(uniq_kpts):
        aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T
        coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs))
        LkR = aoaux.real * coulG
        LkI = aoaux.imag * coulG

        if is_zero(kpt):  # kpti == kptj
            j2c[k][naux:] -= lib.ddot(LkR[naux:], LkR.T)
            j2c[k][naux:] -= lib.ddot(LkI[naux:], LkI.T)
            j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T
        else:
            j2cR, j2cI = zdotCN(LkR[naux:], LkI[naux:], LkR.T, LkI.T)
            j2c[k][naux:] -= j2cR + j2cI * 1j
            j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T.conj()

        j2c[k] = fuse(fuse(j2c[k]).T).T
        try:
            j2c[k] = ('CD', scipy.linalg.cholesky(j2c[k], lower=True))
        except scipy.linalg.LinAlgError as e:
            #msg =('===================================\n'
            #      'J-metric not positive definite.\n'
            #      'It is likely that gs is not enough.\n'
            #      '===================================')
            #log.error(msg)
            #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg]))
            w, v = scipy.linalg.eigh(j2c[k])
            log.debug2('metric linear dependency for kpt %s', k)
            log.debug2('cond = %.4g, drop %d bfns', w[0] / w[-1],
                       numpy.count_nonzero(w < LINEAR_DEP_THR))
            v = v[:, w > LINEAR_DEP_THR].T.conj()
            v /= numpy.sqrt(w[w > LINEAR_DEP_THR]).reshape(-1, 1)
            j2c[k] = ('eig', v)
        kLR = LkR[naux:].T
        kLI = LkI[naux:].T
        if not kLR.flags.c_contiguous: kLR = lib.transpose(LkR[naux:])
        if not kLI.flags.c_contiguous: kLI = lib.transpose(LkI[naux:])
        kLR *= coulG.reshape(-1, 1)
        kLI *= coulG.reshape(-1, 1)
        kLRs.append(kLR)
        kLIs.append(kLI)
        aoaux = LkR = LkI = kLR = kLI = coulG = None

    nauxs = [v[1].shape[0] for v in j2c]
    feri = h5py.File(mydf._cderi)

    def make_kpt(uniq_kptji_id):  # kpt = kptj - kpti
        kpt = uniq_kpts[uniq_kptji_id]
        log.debug1('kpt = %s', kpt)
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)
        log.debug1('adapted_ji_idx = %s', adapted_ji_idx)
        kLR = kLRs[uniq_kptji_id]
        kLI = kLIs[uniq_kptji_id]

        if is_zero(kpt):  # kpti == kptj
            aosym = 's2'
            nao_pair = nao * (nao + 1) // 2

            vbar = fuse(mydf.auxbar(fused_cell))
            ovlp = cell.pbc_intor('cint1e_ovlp_sph',
                                  hermi=1,
                                  kpts=adapted_kptjs)
            for k, ji in enumerate(adapted_ji_idx):
                ovlp[k] = lib.pack_tril(ovlp[k])
        else:
            aosym = 's1'
            nao_pair = nao**2

        mem_now = lib.current_memory()[0]
        log.debug2('memory = %s', mem_now)
        max_memory = max(2000, mydf.max_memory - mem_now)
        # nkptj for 3c-coulomb arrays plus 1 Lpq array
        buflen = min(
            max(int(max_memory * .6 * 1e6 / 16 / naux / (nkptj + 1)), 1),
            nao_pair)
        shranges = _guess_shell_ranges(cell, buflen, aosym)
        buflen = max([x[2] for x in shranges])
        # +1 for a pqkbuf
        if aosym == 's2':
            Gblksize = max(
                16, int(max_memory * .2 * 1e6 / 16 / buflen / (nkptj + 1)))
        else:
            Gblksize = max(
                16, int(max_memory * .4 * 1e6 / 16 / buflen / (nkptj + 1)))
        Gblksize = min(Gblksize, ngs, 16384)
        pqkRbuf = numpy.empty(buflen * Gblksize)
        pqkIbuf = numpy.empty(buflen * Gblksize)
        # buf for ft_aopair
        buf = numpy.zeros((nkptj, buflen * Gblksize), dtype=numpy.complex128)

        col1 = 0
        for istep, sh_range in enumerate(shranges):
            log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \
                       istep+1, len(shranges), *sh_range)
            bstart, bend, ncol = sh_range
            col0, col1 = col1, col1 + ncol
            j3cR = []
            j3cI = []
            for k, idx in enumerate(adapted_ji_idx):
                v = numpy.asarray(feri['j3c/%d' % idx][:, col0:col1])
                if is_zero(kpt):
                    for i, c in enumerate(vbar):
                        if c != 0:
                            v[i] -= c * ovlp[k][col0:col1]
                j3cR.append(numpy.asarray(v.real, order='C'))
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    j3cI.append(None)
                else:
                    j3cI.append(numpy.asarray(v.imag, order='C'))
            v = None

            if aosym == 's2':
                shls_slice = (bstart, bend, 0, bend)
                for p0, p1 in lib.prange(0, ngs, Gblksize):
                    ft_ao._ft_aopair_kpts(cell,
                                          Gv[p0:p1],
                                          shls_slice,
                                          aosym,
                                          b,
                                          gxyz[p0:p1],
                                          Gvbase,
                                          kpt,
                                          adapted_kptjs,
                                          out=buf)
                    nG = p1 - p0
                    for k, ji in enumerate(adapted_ji_idx):
                        aoao = numpy.ndarray((nG, ncol),
                                             dtype=numpy.complex128,
                                             order='F',
                                             buffer=buf[k])
                        pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf)
                        pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf)
                        pqkR[:] = aoao.real.T
                        pqkI[:] = aoao.imag.T
                        aoao[:] = 0
                        lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1)
                        lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1)
                        if not (is_zero(kpt)
                                and gamma_point(adapted_kptjs[k])):
                            lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:],
                                    1)
                            lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1)
            else:
                shls_slice = (bstart, bend, 0, cell.nbas)
                ni = ncol // nao
                for p0, p1 in lib.prange(0, ngs, Gblksize):
                    ft_ao._ft_aopair_kpts(cell,
                                          Gv[p0:p1],
                                          shls_slice,
                                          aosym,
                                          b,
                                          gxyz[p0:p1],
                                          Gvbase,
                                          kpt,
                                          adapted_kptjs,
                                          out=buf)
                    nG = p1 - p0
                    for k, ji in enumerate(adapted_ji_idx):
                        aoao = numpy.ndarray((nG, ni, nao),
                                             dtype=numpy.complex128,
                                             order='F',
                                             buffer=buf[k])
                        pqkR = numpy.ndarray((ni, nao, nG), buffer=pqkRbuf)
                        pqkI = numpy.ndarray((ni, nao, nG), buffer=pqkIbuf)
                        pqkR[:] = aoao.real.transpose(1, 2, 0)
                        pqkI[:] = aoao.imag.transpose(1, 2, 0)
                        aoao[:] = 0
                        pqkR = pqkR.reshape(-1, nG)
                        pqkI = pqkI.reshape(-1, nG)
                        zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T, -1,
                               j3cR[k][naux:], j3cI[k][naux:], 1)

            naux0 = nauxs[uniq_kptji_id]
            for k, ji in enumerate(adapted_ji_idx):
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    v = fuse(j3cR[k])
                else:
                    v = fuse(j3cR[k] + j3cI[k] * 1j)
                if j2c[uniq_kptji_id][0] == 'CD':
                    v = scipy.linalg.solve_triangular(j2c[uniq_kptji_id][1],
                                                      v,
                                                      lower=True,
                                                      overwrite_b=True)
                else:
                    v = lib.dot(j2c[uniq_kptji_id][1], v)
                feri['j3c/%d' % ji][:naux0, col0:col1] = v

        naux0 = nauxs[uniq_kptji_id]
        for k, ji in enumerate(adapted_ji_idx):
            v = feri['j3c/%d' % ji][:naux0]
            del (feri['j3c/%d' % ji])
            feri['j3c/%d' % ji] = v

    for k, kpt in enumerate(uniq_kpts):
        make_kpt(k)

    feri.close()
Exemplo n.º 9
0
def build_Lpq_pbc(mydf, auxcell, kptij_lst):
    '''Fitting coefficients for auxiliary functions'''
    kptis = kptij_lst[:,0]
    kptjs = kptij_lst[:,1]
    kpts_ji = kptjs - kptis
    uniq_kpts, uniq_index, uniq_inverse = unique(kpts_ji)
    max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]))
    if mydf.metric.upper() == 'S':
        outcore.aux_e2(mydf.cell, auxcell, mydf._cderi, 'cint3c1e_sph',
                       kptij_lst=kptij_lst, dataname='Lpq',
                       max_memory=max_memory)
        s_aux = auxcell.pbc_intor('cint1e_ovlp_sph', hermi=1, kpts=uniq_kpts)
    elif mydf.metric.upper() == 'T':
        outcore.aux_e2(mydf.cell, auxcell, mydf._cderi, 'cint3c1e_p2_sph',
                       kptij_lst=kptij_lst, dataname='Lpq', max_memory=max_memory)
        s_aux = [x*2 for x in auxcell.pbc_intor('cint1e_kin_sph', hermi=1, kpts=uniq_kpts)]
    elif mydf.metric.upper() == 'J':
        fused_cell, fuse = fuse_auxcell(mydf, auxcell)
        outcore.aux_e2(mydf.cell, fused_cell, mydf._cderi, 'cint3c2e_sph',
                       kptij_lst=kptij_lst, dataname='j3c', max_memory=max_memory)
        vbar = fuse(mydf.auxbar(fused_cell))
        with h5py.File(mydf._cderi) as f:
            f['Lpq-kptij'] = kptij_lst
            for k_uniq, kpt_uniq in enumerate(uniq_kpts):
                adapted_ji_idx = numpy.where(uniq_inverse == k_uniq)[0]
                adapted_kptjs = kptjs[adapted_ji_idx]
                if is_zero(kpt_uniq):
                    ovlp = mydf.cell.pbc_intor('cint1e_ovlp_sph', hermi=1, kpts=adapted_kptjs)
                    for k, ji in enumerate(adapted_ji_idx):
                        ovlp[k] = lib.pack_tril(ovlp[k])
                for k, idx in enumerate(adapted_ji_idx):
                    v = fuse(numpy.asarray(f['j3c/%d'%idx]))
                    if is_zero(kpt_uniq):
                        for i, c in enumerate(vbar):
                            if c != 0:
                                v[i] -= c * ovlp[k]
                    f['Lpq/%d'%idx] = v
        v = ovlp = vbar = None

        j2c = fused_cell.pbc_intor('cint2c2e_sph', hermi=1, kpts=uniq_kpts)
        for k, kpt in enumerate(uniq_kpts):
            j2c[k] = fuse(fuse(j2c[k]).T).T.copy()
        s_aux = j2c

#    else: # T+S
#        outcore.aux_e2(mydf.cell, auxcell, mydf._cderi, 'cint3c1e_sph',
#                       kptij_lst=kptij_lst, dataname='Lpq_s',
#                       max_memory=max_memory)
#        outcore.aux_e2(mydf.cell, auxcell, mydf._cderi, 'cint3c1e_p2_sph',
#                       kptij_lst=kptij_lst, dataname='Lpq',
#                       max_memory=max_memory)
#        with h5py.File(mydf._cderi) as f:
#            for k in range(len(kptij_lst)):
#                f['Lpq/%d'%k][:] = f['Lpq/%d'%k].value + f['Lpq_s/%d'%k].value
#                del(f['Lpq_s/%d'%k])
#        s_aux = auxcell.pbc_intor('cint1e_ovlp_sph', hermi=1, kpts=uniq_kpts)
#        s_aux = [x+y*2 for x,y in zip(s_aux, auxcell.pbc_intor('cint1e_kin_sph', hermi=1, kpts=uniq_kpts))]

    try:
        s_aux = [scipy.linalg.cho_factor(x) for x in s_aux]
    except scipy.linalg.LinAlgError:
        eigs = [scipy.linalg.eigh(x)[0] for x in s_aux]
        conds = [x[-1]/max(1e-16, x[0]) for x in eigs]
        n = eigs[0].size
        shift = [0] * len(s_aux)
        for i, x in enumerate(s_aux):
            if conds[i] > 1e15:
                shift[i] = max(abs(eigs[i][0])*2, eigs[i][-1]*1e-18)
                x += numpy.eye(n) * shift[i]
        logger.warn(mydf, 'Ill condition number %s found in metric %s.\n'
                    'Level shift %s is applied.',
                    conds, mydf.metric, shift)
        s_aux = [scipy.linalg.cho_factor(x) for x in s_aux]

    max_memory = mydf.max_memory - lib.current_memory()[0]
    naux = auxcell.nao_nr()
    blksize = max(int(max_memory*.5*1e6/16/naux/mydf.blockdim), 1) * mydf.blockdim
    with h5py.File(mydf._cderi) as feri:
        for k, where in enumerate(uniq_inverse):
            s_k = s_aux[where]
            key = 'Lpq/%d' % k
            Lpq = feri[key]
            nao_pair = Lpq.shape[1]
            for p0, p1 in lib.prange(0, nao_pair, blksize):
                Lpq[:,p0:p1] = scipy.linalg.cho_solve(s_k, Lpq[:,p0:p1])
Exemplo n.º 10
0
    def ft_fuse(job_id, uniq_kptji_id, sh0, sh1):
        kpt = uniq_kpts[uniq_kptji_id]  # kpt = kptj - kpti
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)

        j2c = numpy.asarray(fswap['j2c/%d' % uniq_kptji_id])
        j2ctag = j2ctags[uniq_kptji_id]
        naux0 = j2c.shape[0]
        if ('j2c-/%d' % uniq_kptji_id) in fswap:
            j2c_negative = numpy.asarray(fswap['j2c-/%d' % uniq_kptji_id])
        else:
            j2c_negative = None

        if is_zero(kpt):
            aosym = 's2'
        else:
            aosym = 's1'

        if aosym == 's2' and cell.dimension == 3:
            vbar = fuse(mydf.auxbar(fused_cell))
            ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs)
            ovlp = [lib.pack_tril(s) for s in ovlp]

        j3cR = [None] * nkptj
        j3cI = [None] * nkptj
        i0 = ao_loc[sh0]
        i1 = ao_loc[sh1]
        for k, idx in enumerate(adapted_ji_idx):
            key = 'j3c-chunks/%d/%d' % (job_id, idx)
            v = numpy.asarray(fswap[key])
            if aosym == 's2' and cell.dimension == 3:
                for i in numpy.where(vbar != 0)[0]:
                    v[i] -= vbar[i] * ovlp[k][i0 * (i0 + 1) // 2:i1 *
                                              (i1 + 1) // 2].ravel()
            j3cR[k] = numpy.asarray(v.real, order='C')
            if v.dtype == numpy.complex128:
                j3cI[k] = numpy.asarray(v.imag, order='C')
            v = None

        ncol = j3cR[0].shape[1]
        Gblksize = max(16, int(max_memory * 1e6 / 16 / ncol /
                               (nkptj + 1)))  # +1 for pqkRbuf/pqkIbuf
        Gblksize = min(Gblksize, ngrids, 16384)
        pqkRbuf = numpy.empty(ncol * Gblksize)
        pqkIbuf = numpy.empty(ncol * Gblksize)
        buf = numpy.empty(nkptj * ncol * Gblksize, dtype=numpy.complex128)
        log.alldebug2('job_id %d  blksize (%d,%d)', job_id, Gblksize, ncol)

        wcoulG = mydf.weighted_coulG(kpt, False, mesh)
        fused_cell_slice = (auxcell.nbas, fused_cell.nbas)
        if aosym == 's2':
            shls_slice = (sh0, sh1, 0, sh1)
        else:
            shls_slice = (sh0, sh1, 0, cell.nbas)
        for p0, p1 in lib.prange(0, ngrids, Gblksize):
            Gaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], fused_cell_slice, b,
                               gxyz[p0:p1], Gvbase, kpt)
            Gaux *= wcoulG[p0:p1, None]
            kLR = Gaux.real.copy('C')
            kLI = Gaux.imag.copy('C')
            Gaux = None

            dat = ft_ao._ft_aopair_kpts(cell,
                                        Gv[p0:p1],
                                        shls_slice,
                                        aosym,
                                        b,
                                        gxyz[p0:p1],
                                        Gvbase,
                                        kpt,
                                        adapted_kptjs,
                                        out=buf)
            nG = p1 - p0
            for k, ji in enumerate(adapted_ji_idx):
                aoao = dat[k].reshape(nG, ncol)
                pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf)
                pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf)
                pqkR[:] = aoao.real.T
                pqkI[:] = aoao.imag.T

                lib.dot(kLR.T, pqkR.T, -1, j3cR[k][naux:], 1)
                lib.dot(kLI.T, pqkI.T, -1, j3cR[k][naux:], 1)
                if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
                    lib.dot(kLR.T, pqkI.T, -1, j3cI[k][naux:], 1)
                    lib.dot(kLI.T, pqkR.T, 1, j3cI[k][naux:], 1)
            kLR = kLI = None

        for k, idx in enumerate(adapted_ji_idx):
            if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                v = fuse(j3cR[k])
            else:
                v = fuse(j3cR[k] + j3cI[k] * 1j)
            if j2ctag == 'CD':
                v = scipy.linalg.solve_triangular(j2c,
                                                  v,
                                                  lower=True,
                                                  overwrite_b=True)
                fswap['j3c-chunks/%d/%d' % (job_id, idx)][:naux0] = v
            else:
                fswap['j3c-chunks/%d/%d' % (job_id, idx)][:naux0] = lib.dot(
                    j2c, v)

            # low-dimension systems
            if j2c_negative is not None:
                fswap['j3c-/%d/%d' % (job_id, idx)] = lib.dot(j2c_negative, v)
Exemplo n.º 11
0
    def ft_loop(self,
                gs=None,
                q=numpy.zeros(3),
                kpts=None,
                shls_slice=None,
                max_memory=4000,
                aosym='s1'):
        '''
        Fourier transform iterator for all kpti which satisfy  2pi*N = (kpts - kpti - q)*a
        N = -1, 0, 1
        '''
        cell = self.cell
        if gs is None:
            gs = self.gs
        if kpts is None:
            assert (is_zero(q))
            kpts = self.kpts
        kpts = numpy.asarray(kpts)
        nkpts = len(kpts)

        ao_loc = cell.ao_loc_nr()
        b = cell.reciprocal_vectors()
        Gv, Gvbase, kws = cell.get_Gv_weights(gs)
        gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase])
        ngs = gxyz.shape[0]

        if shls_slice is None:
            shls_slice = (0, cell.nbas, 0, cell.nbas)
        if aosym == 's2':
            assert (shls_slice[2] == 0)
            i0 = ao_loc[shls_slice[0]]
            i1 = ao_loc[shls_slice[1]]
            nij = i1 * (i1 + 1) // 2 - i0 * (i0 + 1) // 2
        else:
            ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]]
            nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]]
            nij = ni * nj
        blksize = max(16, int(max_memory * .9e6 / (nij * (nkpts + 1) * 16)))
        blksize = min(blksize, ngs, 16384)
        buf = [numpy.zeros(nij * blksize, dtype='D') for k in range(nkpts)]
        pqkRbuf = numpy.empty(nij * blksize)
        pqkIbuf = numpy.empty(nij * blksize)

        if aosym == 's2':
            for p0, p1 in self.prange(0, ngs, blksize):
                ft_ao._ft_aopair_kpts(cell,
                                      Gv[p0:p1],
                                      shls_slice,
                                      aosym,
                                      b,
                                      gxyz[p0:p1],
                                      Gvbase,
                                      q,
                                      kpts,
                                      out=buf)
                nG = p1 - p0
                for k in range(nkpts):
                    aoao = numpy.ndarray((nG, nij),
                                         dtype=numpy.complex128,
                                         order='F',
                                         buffer=buf[k])
                    pqkR = numpy.ndarray((nij, nG), buffer=pqkRbuf)
                    pqkI = numpy.ndarray((nij, nG), buffer=pqkIbuf)
                    pqkR[:] = aoao.real.T
                    pqkI[:] = aoao.imag.T
                    yield (k, pqkR, pqkI, p0, p1)
                    aoao[:] = 0  # == buf[k][:] = 0
        else:
            for p0, p1 in self.prange(0, ngs, blksize):
                ft_ao._ft_aopair_kpts(cell,
                                      Gv[p0:p1],
                                      shls_slice,
                                      aosym,
                                      b,
                                      gxyz[p0:p1],
                                      Gvbase,
                                      q,
                                      kpts,
                                      out=buf)
                nG = p1 - p0
                for k in range(nkpts):
                    aoao = numpy.ndarray((nG, ni, nj),
                                         dtype=numpy.complex128,
                                         order='F',
                                         buffer=buf[k])
                    pqkR = numpy.ndarray((ni, nj, nG), buffer=pqkRbuf)
                    pqkI = numpy.ndarray((ni, nj, nG), buffer=pqkIbuf)
                    pqkR[:] = aoao.real.transpose(1, 2, 0)
                    pqkI[:] = aoao.imag.transpose(1, 2, 0)
                    yield (k, pqkR.reshape(-1, nG), pqkI.reshape(-1,
                                                                 nG), p0, p1)
                    aoao[:] = 0  # == buf[k][:] = 0
Exemplo n.º 12
0
Arquivo: df.py Projeto: eronca/pyscf
    def make_kpt(uniq_kptji_id):  # kpt = kptj - kpti
        kpt = uniq_kpts[uniq_kptji_id]
        log.debug1('kpt = %s', kpt)
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)
        log.debug1('adapted_ji_idx = %s', adapted_ji_idx)
        kLR = kLRs[uniq_kptji_id]
        kLI = kLIs[uniq_kptji_id]

        if is_zero(kpt):  # kpti == kptj
            aosym = 's2'
            nao_pair = nao*(nao+1)//2

            vbar = fuse(mydf.auxbar(fused_cell))
            ovlp = cell.pbc_intor('cint1e_ovlp_sph', hermi=1, kpts=adapted_kptjs)
            for k, ji in enumerate(adapted_ji_idx):
                ovlp[k] = lib.pack_tril(ovlp[k])
        else:
            aosym = 's1'
            nao_pair = nao**2

        max_memory = max(2000, mydf.max_memory-lib.current_memory()[0])
        # nkptj for 3c-coulomb arrays plus 1 Lpq array
        buflen = min(max(int(max_memory*.6*1e6/16/naux/(nkptj+1)), 1), nao_pair)
        shranges = _guess_shell_ranges(cell, buflen, aosym)
        buflen = max([x[2] for x in shranges])
        # +1 for a pqkbuf
        if aosym == 's2':
            Gblksize = max(16, int(max_memory*.2*1e6/16/buflen/(nkptj+1)))
        else:
            Gblksize = max(16, int(max_memory*.4*1e6/16/buflen/(nkptj+1)))
        Gblksize = min(Gblksize, ngs, 16384)
        pqkRbuf = numpy.empty(buflen*Gblksize)
        pqkIbuf = numpy.empty(buflen*Gblksize)
        # buf for ft_aopair
        buf = numpy.zeros((nkptj,buflen*Gblksize), dtype=numpy.complex128)

        col1 = 0
        for istep, sh_range in enumerate(shranges):
            log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \
                       istep+1, len(shranges), *sh_range)
            bstart, bend, ncol = sh_range
            col0, col1 = col1, col1+ncol
            j3cR = []
            j3cI = []
            for k, idx in enumerate(adapted_ji_idx):
                v = numpy.asarray(feri['j3c/%d'%idx][:,col0:col1])
                if is_zero(kpt):
                    for i, c in enumerate(vbar):
                        if c != 0:
                            v[i] -= c * ovlp[k][col0:col1]
                j3cR.append(numpy.asarray(v.real, order='C'))
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    j3cI.append(None)
                else:
                    j3cI.append(numpy.asarray(v.imag, order='C'))

            if aosym == 's2':
                shls_slice = (bstart, bend, 0, bend)
                for p0, p1 in lib.prange(0, ngs, Gblksize):
                    ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym,
                                          b, gxyz[p0:p1], Gvbase, kpt,
                                          adapted_kptjs, out=buf)
                    nG = p1 - p0
                    for k, ji in enumerate(adapted_ji_idx):
                        aoao = numpy.ndarray((nG,ncol), dtype=numpy.complex128,
                                             order='F', buffer=buf[k])
                        pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf)
                        pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf)
                        pqkR[:] = aoao.real.T
                        pqkI[:] = aoao.imag.T
                        aoao[:] = 0
                        lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1)
                        lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1)
                        if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
                            lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1)
                            lib.dot(kLI[p0:p1].T, pqkR.T,  1, j3cI[k][naux:], 1)
            else:
                shls_slice = (bstart, bend, 0, cell.nbas)
                ni = ncol // nao
                for p0, p1 in lib.prange(0, ngs, Gblksize):
                    ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym,
                                          b, gxyz[p0:p1], Gvbase, kpt,
                                          adapted_kptjs, out=buf)
                    nG = p1 - p0
                    for k, ji in enumerate(adapted_ji_idx):
                        aoao = numpy.ndarray((nG,ni,nao), dtype=numpy.complex128,
                                             order='F', buffer=buf[k])
                        pqkR = numpy.ndarray((ni,nao,nG), buffer=pqkRbuf)
                        pqkI = numpy.ndarray((ni,nao,nG), buffer=pqkIbuf)
                        pqkR[:] = aoao.real.transpose(1,2,0)
                        pqkI[:] = aoao.imag.transpose(1,2,0)
                        aoao[:] = 0
                        pqkR = pqkR.reshape(-1,nG)
                        pqkI = pqkI.reshape(-1,nG)
                        zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T,
                               -1, j3cR[k][naux:], j3cI[k][naux:], 1)

            for k, ji in enumerate(adapted_ji_idx):
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    v = fuse(j3cR[k])
                else:
                    v = fuse(j3cR[k] + j3cI[k] * 1j)

                v = scipy.linalg.solve_triangular(j2c[uniq_kptji_id], v,
                                                  lower=True, overwrite_b=True)
                feri['j3c/%d'%ji][:naux,col0:col1] = v
Exemplo n.º 13
0
Arquivo: df.py Projeto: eronca/pyscf
def _make_j3c(mydf, cell, auxcell, kptij_lst):
    t1 = (time.clock(), time.time())
    log = logger.Logger(mydf.stdout, mydf.verbose)
    max_memory = max(2000, mydf.max_memory-lib.current_memory()[0])
    fused_cell, fuse = fuse_auxcell(mydf, auxcell)
    outcore.aux_e2(cell, fused_cell, mydf._cderi, 'cint3c2e_sph',
                   kptij_lst=kptij_lst, dataname='j3c', max_memory=max_memory)
    t1 = log.timer_debug1('3c2e', *t1)

    nao = cell.nao_nr()
    naux = auxcell.nao_nr()
    gs = mydf.gs
    Gv, Gvbase, kws = cell.get_Gv_weights(gs)
    b = cell.reciprocal_vectors()
    gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase])
    ngs = gxyz.shape[0]

    kptis = kptij_lst[:,0]
    kptjs = kptij_lst[:,1]
    kpt_ji = kptjs - kptis
    uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji)
    # j2c ~ (-kpt_ji | kpt_ji)
    j2c = fused_cell.pbc_intor('cint2c2e_sph', hermi=1, kpts=uniq_kpts)
    kLRs = []
    kLIs = []
#    chgcell = make_modchg_basis(auxcell, mydf.eta)
#    for k, kpt in enumerate(uniq_kpts):
#        aoaux = ft_ao.ft_ao(chgcell, Gv, None, b, gxyz, Gvbase, kpt).T
#        coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs))
#        LkR = aoaux.real * coulG
#        LkI = aoaux.imag * coulG
#        j2caux = numpy.zeros_like(j2c[k])
#        j2caux[naux:,naux:] = j2c[naux:,naux:]
#        if is_zero(kpt):  # kpti == kptj
#            j2caux[naux:,naux:] -= lib.ddot(LkR, LkR.T)
#            j2caux[naux:,naux:] -= lib.ddot(LkI, LkI.T)
#            j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T)
#            vbar = fuse(mydf.auxbar(fused_cell))
#            s = (vbar != 0).astype(numpy.double)
#            j2c[k] -= numpy.einsum('i,j->ij', vbar, s)
#            j2c[k] -= numpy.einsum('i,j->ij', s, vbar)
#        else:
#            j2cR, j2cI = zdotCN(LkR, LkI, LkR.T, LkI.T)
#            j2caux[naux:,naux:] -= j2cR + j2cI * 1j
#            j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T)
#        #j2c[k] = fuse(fuse(j2c[k]).T).T.copy()
#        try:
#            j2c[k] = scipy.linalg.cholesky(fuse(fuse(j2c[k]).T).T, lower=True)
#        except scipy.linalg.LinAlgError as e:
#            msg =('===================================\n'
#                  'J-metric not positive definite.\n'
#                  'It is likely that gs is not enough.\n'
#                  '===================================')
#            log.error(msg)
#            raise scipy.linalg.LinAlgError('\n'.join([e.message, msg]))
#        kLR = LkR.T
#        kLI = LkI.T
#        if not kLR.flags.c_contiguous: kLR = lib.transpose(LkR)
#        if not kLI.flags.c_contiguous: kLI = lib.transpose(LkI)
#        kLR *= coulG.reshape(-1,1)
#        kLI *= coulG.reshape(-1,1)
#        kLRs.append(kLR)
#        kLIs.append(kLI)
#        aoaux = LkR = LkI = kLR = kLI = coulG = None
    for k, kpt in enumerate(uniq_kpts):
        aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T
        coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs))
        LkR = aoaux.real * coulG
        LkI = aoaux.imag * coulG

        if is_zero(kpt):  # kpti == kptj
            j2c[k][naux:] -= lib.ddot(LkR[naux:], LkR.T)
            j2c[k][naux:] -= lib.ddot(LkI[naux:], LkI.T)
            j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T
        else:
            j2cR, j2cI = zdotCN(LkR[naux:], LkI[naux:], LkR.T, LkI.T)
            j2c[k][naux:] -= j2cR + j2cI * 1j
            j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T.conj()
        #j2c[k] = fuse(fuse(j2c[k]).T).T.copy()
        try:
            j2c[k] = scipy.linalg.cholesky(fuse(fuse(j2c[k]).T).T, lower=True)
        except scipy.linalg.LinAlgError as e:
            msg =('===================================\n'
                  'J-metric not positive definite.\n'
                  'It is likely that gs is not enough.\n'
                  '===================================')
            log.error(msg)
            raise scipy.linalg.LinAlgError('\n'.join([e.message, msg]))
        kLR = LkR[naux:].T
        kLI = LkI[naux:].T
        if not kLR.flags.c_contiguous: kLR = lib.transpose(LkR[naux:])
        if not kLI.flags.c_contiguous: kLI = lib.transpose(LkI[naux:])
        kLR *= coulG.reshape(-1,1)
        kLI *= coulG.reshape(-1,1)
        kLRs.append(kLR)
        kLIs.append(kLI)
        aoaux = LkR = LkI = kLR = kLI = coulG = None

    feri = h5py.File(mydf._cderi)

    def make_kpt(uniq_kptji_id):  # kpt = kptj - kpti
        kpt = uniq_kpts[uniq_kptji_id]
        log.debug1('kpt = %s', kpt)
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)
        log.debug1('adapted_ji_idx = %s', adapted_ji_idx)
        kLR = kLRs[uniq_kptji_id]
        kLI = kLIs[uniq_kptji_id]

        if is_zero(kpt):  # kpti == kptj
            aosym = 's2'
            nao_pair = nao*(nao+1)//2

            vbar = fuse(mydf.auxbar(fused_cell))
            ovlp = cell.pbc_intor('cint1e_ovlp_sph', hermi=1, kpts=adapted_kptjs)
            for k, ji in enumerate(adapted_ji_idx):
                ovlp[k] = lib.pack_tril(ovlp[k])
        else:
            aosym = 's1'
            nao_pair = nao**2

        max_memory = max(2000, mydf.max_memory-lib.current_memory()[0])
        # nkptj for 3c-coulomb arrays plus 1 Lpq array
        buflen = min(max(int(max_memory*.6*1e6/16/naux/(nkptj+1)), 1), nao_pair)
        shranges = _guess_shell_ranges(cell, buflen, aosym)
        buflen = max([x[2] for x in shranges])
        # +1 for a pqkbuf
        if aosym == 's2':
            Gblksize = max(16, int(max_memory*.2*1e6/16/buflen/(nkptj+1)))
        else:
            Gblksize = max(16, int(max_memory*.4*1e6/16/buflen/(nkptj+1)))
        Gblksize = min(Gblksize, ngs, 16384)
        pqkRbuf = numpy.empty(buflen*Gblksize)
        pqkIbuf = numpy.empty(buflen*Gblksize)
        # buf for ft_aopair
        buf = numpy.zeros((nkptj,buflen*Gblksize), dtype=numpy.complex128)

        col1 = 0
        for istep, sh_range in enumerate(shranges):
            log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \
                       istep+1, len(shranges), *sh_range)
            bstart, bend, ncol = sh_range
            col0, col1 = col1, col1+ncol
            j3cR = []
            j3cI = []
            for k, idx in enumerate(adapted_ji_idx):
                v = numpy.asarray(feri['j3c/%d'%idx][:,col0:col1])
                if is_zero(kpt):
                    for i, c in enumerate(vbar):
                        if c != 0:
                            v[i] -= c * ovlp[k][col0:col1]
                j3cR.append(numpy.asarray(v.real, order='C'))
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    j3cI.append(None)
                else:
                    j3cI.append(numpy.asarray(v.imag, order='C'))

            if aosym == 's2':
                shls_slice = (bstart, bend, 0, bend)
                for p0, p1 in lib.prange(0, ngs, Gblksize):
                    ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym,
                                          b, gxyz[p0:p1], Gvbase, kpt,
                                          adapted_kptjs, out=buf)
                    nG = p1 - p0
                    for k, ji in enumerate(adapted_ji_idx):
                        aoao = numpy.ndarray((nG,ncol), dtype=numpy.complex128,
                                             order='F', buffer=buf[k])
                        pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf)
                        pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf)
                        pqkR[:] = aoao.real.T
                        pqkI[:] = aoao.imag.T
                        aoao[:] = 0
                        lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1)
                        lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1)
                        if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
                            lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1)
                            lib.dot(kLI[p0:p1].T, pqkR.T,  1, j3cI[k][naux:], 1)
            else:
                shls_slice = (bstart, bend, 0, cell.nbas)
                ni = ncol // nao
                for p0, p1 in lib.prange(0, ngs, Gblksize):
                    ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym,
                                          b, gxyz[p0:p1], Gvbase, kpt,
                                          adapted_kptjs, out=buf)
                    nG = p1 - p0
                    for k, ji in enumerate(adapted_ji_idx):
                        aoao = numpy.ndarray((nG,ni,nao), dtype=numpy.complex128,
                                             order='F', buffer=buf[k])
                        pqkR = numpy.ndarray((ni,nao,nG), buffer=pqkRbuf)
                        pqkI = numpy.ndarray((ni,nao,nG), buffer=pqkIbuf)
                        pqkR[:] = aoao.real.transpose(1,2,0)
                        pqkI[:] = aoao.imag.transpose(1,2,0)
                        aoao[:] = 0
                        pqkR = pqkR.reshape(-1,nG)
                        pqkI = pqkI.reshape(-1,nG)
                        zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T,
                               -1, j3cR[k][naux:], j3cI[k][naux:], 1)

            for k, ji in enumerate(adapted_ji_idx):
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    v = fuse(j3cR[k])
                else:
                    v = fuse(j3cR[k] + j3cI[k] * 1j)

                v = scipy.linalg.solve_triangular(j2c[uniq_kptji_id], v,
                                                  lower=True, overwrite_b=True)
                feri['j3c/%d'%ji][:naux,col0:col1] = v

    for k, kpt in enumerate(uniq_kpts):
        make_kpt(k)

    for k, kptij in enumerate(kptij_lst):
        v = feri['j3c/%d'%k][:naux]
        del(feri['j3c/%d'%k])
        feri['j3c/%d'%k] = v

    feri.close()
Exemplo n.º 14
0
Arquivo: mdf.py Projeto: armunoz/pyscf
def _make_j3c(mydf, cell, auxcell, kptij_lst):
    t1 = (time.clock(), time.time())
    log = logger.Logger(mydf.stdout, mydf.verbose)
    max_memory = max(2000, mydf.max_memory-lib.current_memory()[0])
    fused_cell, fuse = fuse_auxcell(mydf, mydf.auxcell)

    nao = cell.nao_nr()
    naux = auxcell.nao_nr()
    gs = mydf.gs
    Gv, Gvbase, kws = cell.get_Gv_weights(gs)
    b = cell.reciprocal_vectors()
    gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase])
    ngs = gxyz.shape[0]

    kptis = kptij_lst[:,0]
    kptjs = kptij_lst[:,1]
    kpt_ji = kptjs - kptis
    uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji)
    # j2c ~ (-kpt_ji | kpt_ji)
    j2c = fused_cell.pbc_intor('cint2c2e_sph', hermi=1, kpts=uniq_kpts)
    kLRs = []
    kLIs = []
    for k, kpt in enumerate(uniq_kpts):
        aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T
        aoaux = fuse(aoaux)
        coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs))
        kLR = (aoaux.real * coulG).T
        kLI = (aoaux.imag * coulG).T
        if not kLR.flags.c_contiguous: kLR = lib.transpose(kLR.T)
        if not kLI.flags.c_contiguous: kLI = lib.transpose(kLI.T)

        j2c[k] = fuse(fuse(j2c[k]).T).T.copy()
        if is_zero(kpt):  # kpti == kptj
            j2c[k] -= lib.dot(kLR.T, kLR)
            j2c[k] -= lib.dot(kLI.T, kLI)
        else:
             # aoaux ~ kpt_ij, aoaux.conj() ~ kpt_kl
            j2cR, j2cI = zdotCN(kLR.T, kLI.T, kLR, kLI)
            j2c[k] -= j2cR + j2cI * 1j

        w, v = scipy.linalg.eigh(j2c[k])
        log.debug('MDF metric for kpt %s cond = %.4g, drop %d bfns',
                  k, w[0]/w[-1], numpy.count_nonzero(w<df.LINEAR_DEP_THR))
        v = v[:,w>df.LINEAR_DEP_THR].T.conj()
        v /= numpy.sqrt(w[w>df.LINEAR_DEP_THR]).reshape(-1,1)
        j2c[k] = ('eig', v)

        kLR *= coulG.reshape(-1,1)
        kLI *= coulG.reshape(-1,1)
        kLRs.append(kLR)
        kLIs.append(kLI)
        aoaux = kLR = kLI = j2cR = j2cI = coulG = None

    outcore.aux_e2(cell, fused_cell, mydf._cderi, 'cint3c2e_sph',
                   kptij_lst=kptij_lst, dataname='j3c', max_memory=max_memory)
    t1 = log.timer_debug1('3c2e', *t1)
    nauxs = [v[1].shape[0] for v in j2c]
    feri = h5py.File(mydf._cderi)

    def make_kpt(uniq_kptji_id):  # kpt = kptj - kpti
        kpt = uniq_kpts[uniq_kptji_id]
        log.debug1('kpt = %s', kpt)
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)
        log.debug1('adapted_ji_idx = %s', adapted_ji_idx)
        kLR = kLRs[uniq_kptji_id]
        kLI = kLIs[uniq_kptji_id]

        if is_zero(kpt):  # kpti == kptj
            aosym = 's2'
            nao_pair = nao*(nao+1)//2

            vbar = fuse(mydf.auxbar(fused_cell))
            ovlp = cell.pbc_intor('cint1e_ovlp_sph', hermi=1, kpts=adapted_kptjs)
            for k, ji in enumerate(adapted_ji_idx):
                ovlp[k] = lib.pack_tril(ovlp[k])
        else:
            aosym = 's1'
            nao_pair = nao**2

        mem_now = lib.current_memory()[0]
        log.debug2('memory = %s', mem_now)
        max_memory = max(2000, mydf.max_memory-mem_now)
        # nkptj for 3c-coulomb arrays plus 1 Lpq array
        buflen = min(max(int(max_memory*.6*1e6/16/naux/(nkptj+1)), 1), nao_pair)
        shranges = _guess_shell_ranges(cell, buflen, aosym)
        buflen = max([x[2] for x in shranges])
        # +1 for a pqkbuf
        if aosym == 's2':
            Gblksize = max(16, int(max_memory*.2*1e6/16/buflen/(nkptj+1)))
        else:
            Gblksize = max(16, int(max_memory*.4*1e6/16/buflen/(nkptj+1)))
        Gblksize = min(Gblksize, ngs, 16384)
        pqkRbuf = numpy.empty(buflen*Gblksize)
        pqkIbuf = numpy.empty(buflen*Gblksize)
        # buf for ft_aopair
        buf = numpy.zeros((nkptj,buflen*Gblksize), dtype=numpy.complex128)

        col1 = 0
        for istep, sh_range in enumerate(shranges):
            log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \
                       istep+1, len(shranges), *sh_range)
            bstart, bend, ncol = sh_range
            col0, col1 = col1, col1+ncol
            j3cR = []
            j3cI = []
            for k, idx in enumerate(adapted_ji_idx):
                v = fuse(numpy.asarray(feri['j3c/%d'%idx][:,col0:col1]))
                if is_zero(kpt):
                    for i, c in enumerate(vbar):
                        if c != 0:
                            v[i] -= c * ovlp[k][col0:col1]
                j3cR.append(numpy.asarray(v.real, order='C'))
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    j3cI.append(None)
                else:
                    j3cI.append(numpy.asarray(v.imag, order='C'))
                v = None

            if aosym == 's2':
                shls_slice = (bstart, bend, 0, bend)
                for p0, p1 in lib.prange(0, ngs, Gblksize):
                    ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym,
                                          b, gxyz[p0:p1], Gvbase, kpt,
                                          adapted_kptjs, out=buf)
                    nG = p1 - p0
                    for k, ji in enumerate(adapted_ji_idx):
                        aoao = numpy.ndarray((nG,ncol), dtype=numpy.complex128,
                                             order='F', buffer=buf[k])
                        pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf)
                        pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf)
                        pqkR[:] = aoao.real.T
                        pqkI[:] = aoao.imag.T
                        aoao[:] = 0
                        lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1)
                        lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1)
                        if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
                            lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1)
                            lib.dot(kLI[p0:p1].T, pqkR.T,  1, j3cI[k], 1)
            else:
                shls_slice = (bstart, bend, 0, cell.nbas)
                ni = ncol // nao
                for p0, p1 in lib.prange(0, ngs, Gblksize):
                    ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym,
                                          b, gxyz[p0:p1], Gvbase, kpt,
                                          adapted_kptjs, out=buf)
                    nG = p1 - p0
                    for k, ji in enumerate(adapted_ji_idx):
                        aoao = numpy.ndarray((nG,ni,nao), dtype=numpy.complex128,
                                             order='F', buffer=buf[k])
                        pqkR = numpy.ndarray((ni,nao,nG), buffer=pqkRbuf)
                        pqkI = numpy.ndarray((ni,nao,nG), buffer=pqkIbuf)
                        pqkR[:] = aoao.real.transpose(1,2,0)
                        pqkI[:] = aoao.imag.transpose(1,2,0)
                        aoao[:] = 0
                        pqkR = pqkR.reshape(-1,nG)
                        pqkI = pqkI.reshape(-1,nG)
                        zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T,
                               -1, j3cR[k], j3cI[k], 1)

            naux0 = nauxs[uniq_kptji_id]
            for k, ji in enumerate(adapted_ji_idx):
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    v = j3cR[k]
                else:
                    v = j3cR[k] + j3cI[k] * 1j
                if j2c[uniq_kptji_id][0] == 'CD':
                    v = scipy.linalg.solve_triangular(j2c[uniq_kptji_id][1], v,
                                                      lower=True, overwrite_b=True)
                else:
                    v = lib.dot(j2c[uniq_kptji_id][1], v)
                feri['j3c/%d'%ji][:naux0,col0:col1] = v

        naux0 = nauxs[uniq_kptji_id]
        for k, ji in enumerate(adapted_ji_idx):
            v = feri['j3c/%d'%ji][:naux0]
            del(feri['j3c/%d'%ji])
            feri['j3c/%d'%ji] = v

    for k, kpt in enumerate(uniq_kpts):
        make_kpt(k)

    feri.close()
Exemplo n.º 15
0
def _make_j3c(mydf, cell, auxcell, kptij_lst):
    t1 = (time.clock(), time.time())
    log = logger.Logger(mydf.stdout, mydf.verbose)
    max_memory = max(2000, mydf.max_memory-lib.current_memory()[0])
    fused_cell, fuse = fuse_auxcell(mydf, mydf.auxcell)
    if mydf.metric.upper() != 'J':
        outcore.aux_e2(cell, fused_cell, mydf._cderi, 'cint3c2e_sph',
                       kptij_lst=kptij_lst, dataname='j3c', max_memory=max_memory)
    t1 = log.timer_debug1('3c2e', *t1)

    nao = cell.nao_nr()
    naux = auxcell.nao_nr()
    gs = mydf.gs
    Gv, Gvbase, kws = cell.get_Gv_weights(gs)
    b = cell.reciprocal_vectors()
    gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase])
    ngs = gxyz.shape[0]

    kptis = kptij_lst[:,0]
    kptjs = kptij_lst[:,1]
    kpt_ji = kptjs - kptis
    uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji)
    # j2c ~ (-kpt_ji | kpt_ji)
    j2c = fused_cell.pbc_intor('cint2c2e_sph', hermi=1, kpts=uniq_kpts)
    kLRs = []
    kLIs = []
    for k, kpt in enumerate(uniq_kpts):
        aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T
        aoaux = fuse(aoaux)
        coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs))
        kLR = (aoaux.real * coulG).T
        kLI = (aoaux.imag * coulG).T
        if not kLR.flags.c_contiguous: kLR = lib.transpose(kLR.T)
        if not kLI.flags.c_contiguous: kLI = lib.transpose(kLI.T)

        j2c[k] = fuse(fuse(j2c[k]).T).T.copy()
        if is_zero(kpt):  # kpti == kptj
            j2c[k] -= lib.dot(kLR.T, kLR)
            j2c[k] -= lib.dot(kLI.T, kLI)
        else:
             # aoaux ~ kpt_ij, aoaux.conj() ~ kpt_kl
            j2cR, j2cI = zdotCN(kLR.T, kLI.T, kLR, kLI)
            j2c[k] -= j2cR + j2cI * 1j

        kLR *= coulG.reshape(-1,1)
        kLI *= coulG.reshape(-1,1)
        kLRs.append(kLR)
        kLIs.append(kLI)
        aoaux = kLR = kLI = j2cR = j2cI = coulG = None

    feri = h5py.File(mydf._cderi)
    log.debug2('memory = %s', lib.current_memory()[0])

    # Expand approx Lpq for aosym='s1'.  The approx Lpq are all in aosym='s2' mode
    if mydf.approx_sr_level > 0 and len(kptij_lst) > 1:
        Lpq_fake = _fake_Lpq_kpts(mydf, feri, naux, nao)

    def save(label, dat, col0, col1):
        nrow = dat.shape[0]
        feri[label][:nrow,col0:col1] = dat

    def make_kpt(uniq_kptji_id):  # kpt = kptj - kpti
        kpt = uniq_kpts[uniq_kptji_id]
        log.debug1('kpt = %s', kpt)
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)
        log.debug1('adapted_ji_idx = %s', adapted_ji_idx)
        kLR = kLRs[uniq_kptji_id]
        kLI = kLIs[uniq_kptji_id]

        if is_zero(kpt):  # kpti == kptj
            aosym = 's2'
            nao_pair = nao*(nao+1)//2

            vbar = fuse(mydf.auxbar(fused_cell))
            ovlp = cell.pbc_intor('cint1e_ovlp_sph', hermi=1, kpts=adapted_kptjs)
            for k, ji in enumerate(adapted_ji_idx):
                ovlp[k] = lib.pack_tril(ovlp[k])
        else:
            aosym = 's1'
            nao_pair = nao**2

        mem_now = lib.current_memory()[0]
        log.debug2('memory = %s', mem_now)
        max_memory = max(2000, mydf.max_memory-mem_now)
        # nkptj for 3c-coulomb arrays plus 1 Lpq array
        buflen = min(max(int(max_memory*.6*1e6/16/naux/(nkptj+1)), 1), nao_pair)
        shranges = pyscf.df.outcore._guess_shell_ranges(cell, buflen, aosym)
        buflen = max([x[2] for x in shranges])
        # +1 for a pqkbuf
        if aosym == 's2':
            Gblksize = max(16, int(max_memory*.2*1e6/16/buflen/(nkptj+1)))
        else:
            Gblksize = max(16, int(max_memory*.4*1e6/16/buflen/(nkptj+1)))
        Gblksize = min(Gblksize, ngs, 16384)
        pqkRbuf = numpy.empty(buflen*Gblksize)
        pqkIbuf = numpy.empty(buflen*Gblksize)
        # buf for ft_aopair
        buf = numpy.zeros((nkptj,buflen*Gblksize), dtype=numpy.complex128)

        col1 = 0
        for istep, sh_range in enumerate(shranges):
            log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \
                       istep+1, len(shranges), *sh_range)
            bstart, bend, ncol = sh_range
            col0, col1 = col1, col1+ncol
            j3cR = []
            j3cI = []
            for k, idx in enumerate(adapted_ji_idx):
                v = fuse(numpy.asarray(feri['j3c/%d'%idx][:,col0:col1]))

                if mydf.approx_sr_level == 0:
                    Lpq = numpy.asarray(feri['Lpq/%d'%idx][:,col0:col1])
                elif aosym == 's2':
                    Lpq = numpy.asarray(feri['Lpq/0'][:,col0:col1])
                else:
                    Lpq = numpy.asarray(Lpq_fake[:,col0:col1])
                lib.dot(j2c[uniq_kptji_id], Lpq, -.5, v, 1)
                if is_zero(kpt):
                    for i, c in enumerate(vbar):
                        if c != 0:
                            v[i] -= c * ovlp[k][col0:col1]

                j3cR.append(numpy.asarray(v.real, order='C'))
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    j3cI.append(None)
                else:
                    j3cI.append(numpy.asarray(v.imag, order='C'))
            v = Lpq = None
            log.debug3('  istep, k = %d %d  memory = %s',
                       istep, k, lib.current_memory()[0])

            if aosym == 's2':
                shls_slice = (bstart, bend, 0, bend)
                for p0, p1 in lib.prange(0, ngs, Gblksize):
                    ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym,
                                          b, gxyz[p0:p1], Gvbase, kpt,
                                          adapted_kptjs, out=buf)
                    nG = p1 - p0
                    for k, ji in enumerate(adapted_ji_idx):
                        aoao = numpy.ndarray((nG,ncol), dtype=numpy.complex128,
                                             order='F', buffer=buf[k])
                        pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf)
                        pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf)
                        pqkR[:] = aoao.real.T
                        pqkI[:] = aoao.imag.T
                        aoao[:] = 0
                        lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1)
                        lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1)
                        if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
                            lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1)
                            lib.dot(kLI[p0:p1].T, pqkR.T,  1, j3cI[k], 1)
                    log.debug3('  p0:p1 = %d:%d  memory = %s',
                               p0, p1, lib.current_memory()[0])
            else:
                shls_slice = (bstart, bend, 0, cell.nbas)
                ni = ncol // nao
                for p0, p1 in lib.prange(0, ngs, Gblksize):
                    ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym,
                                          b, gxyz[p0:p1], Gvbase, kpt,
                                          adapted_kptjs, out=buf)
                    nG = p1 - p0
                    for k, ji in enumerate(adapted_ji_idx):
                        aoao = numpy.ndarray((nG,ni,nao), dtype=numpy.complex128,
                                             order='F', buffer=buf[k])
                        pqkR = numpy.ndarray((ni,nao,nG), buffer=pqkRbuf)
                        pqkI = numpy.ndarray((ni,nao,nG), buffer=pqkIbuf)
                        pqkR[:] = aoao.real.transpose(1,2,0)
                        pqkI[:] = aoao.imag.transpose(1,2,0)
                        aoao[:] = 0
                        pqkR = pqkR.reshape(-1,nG)
                        pqkI = pqkI.reshape(-1,nG)
                        zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T,
                               -1, j3cR[k], j3cI[k], 1)
                    log.debug3('  p0:p1 = %d:%d  memory = %s',
                               p0, p1, lib.current_memory()[0])

            for k, ji in enumerate(adapted_ji_idx):
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    save('j3c/%d'%ji, j3cR[k], col0, col1)
                else:
                    save('j3c/%d'%ji, j3cR[k]+j3cI[k]*1j, col0, col1)


    for k, kpt in enumerate(uniq_kpts):
        make_kpt(k)

    feri.close()
Exemplo n.º 16
0
    def ft_fuse(job_id, uniq_kptji_id, sh0, sh1):
        kpt = uniq_kpts[uniq_kptji_id]  # kpt = kptj - kpti
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)

        shls_slice = (auxcell.nbas, fused_cell.nbas)
        Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt)
        Gaux *= mydf.weighted_coulG(kpt, False, gs).reshape(-1, 1)
        kLR = Gaux.real.copy('C')
        kLI = Gaux.imag.copy('C')
        j2c = numpy.asarray(feri['j2c/%d' % uniq_kptji_id])
        j2ctag = j2ctags[uniq_kptji_id]
        naux0 = j2c.shape[0]

        if is_zero(kpt):
            aosym = 's2'
        else:
            aosym = 's1'

        j3cR = [None] * nkptj
        j3cI = [None] * nkptj
        i0 = ao_loc[sh0]
        i1 = ao_loc[sh1]
        for k, idx in enumerate(adapted_ji_idx):
            key = 'j3c-chunks/%d/%d' % (job_id, idx)
            v = numpy.asarray(feri[key])
            if is_zero(kpt):
                for i, c in enumerate(vbar):
                    if c != 0:
                        v[i] -= c * ovlp[k][i0 * (i0 + 1) // 2:i1 *
                                            (i1 + 1) // 2].ravel()
            j3cR[k] = numpy.asarray(v.real, order='C')
            if v.dtype == numpy.complex128:
                j3cI[k] = numpy.asarray(v.imag, order='C')
            v = None

        ncol = j3cR[0].shape[1]
        Gblksize = max(16, int(max_memory * 1e6 / 16 / ncol /
                               (nkptj + 1)))  # +1 for pqkRbuf/pqkIbuf
        Gblksize = min(Gblksize, ngs, 16384)
        pqkRbuf = numpy.empty(ncol * Gblksize)
        pqkIbuf = numpy.empty(ncol * Gblksize)
        buf = numpy.empty(nkptj * ncol * Gblksize, dtype=numpy.complex128)
        log.alldebug2('    blksize (%d,%d)', Gblksize, ncol)

        shls_slice = (sh0, sh1, 0, cell.nbas)
        for p0, p1 in lib.prange(0, ngs, Gblksize):
            dat = ft_ao._ft_aopair_kpts(cell,
                                        Gv[p0:p1],
                                        shls_slice,
                                        aosym,
                                        b,
                                        gxyz[p0:p1],
                                        Gvbase,
                                        kpt,
                                        adapted_kptjs,
                                        out=buf)
            nG = p1 - p0
            for k, ji in enumerate(adapted_ji_idx):
                aoao = dat[k].reshape(nG, ncol)
                pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf)
                pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf)
                pqkR[:] = aoao.real.T
                pqkI[:] = aoao.imag.T

                lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1)
                lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1)
                if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
                    lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1)
                    lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1)

        for k, idx in enumerate(adapted_ji_idx):
            if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                v = fuse(j3cR[k])
            else:
                v = fuse(j3cR[k] + j3cI[k] * 1j)
            if j2ctag == 'CD':
                v = scipy.linalg.solve_triangular(j2c,
                                                  v,
                                                  lower=True,
                                                  overwrite_b=True)
            else:
                v = lib.dot(j2c, v)
            feri['j3c-chunks/%d/%d' % (job_id, idx)][:naux0] = v
Exemplo n.º 17
0
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file):
    log = logger.Logger(mydf.stdout, mydf.verbose)
    t1 = t0 = (time.clock(), time.time())

    fused_cell, fuse = fuse_auxcell(mydf, mydf.auxcell)
    ao_loc = cell.ao_loc_nr()
    nao = ao_loc[-1]
    naux = auxcell.nao_nr()
    nkptij = len(kptij_lst)
    gs = mydf.gs
    Gv, Gvbase, kws = cell.get_Gv_weights(gs)
    b = cell.reciprocal_vectors()
    gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase])
    ngs = gxyz.shape[0]

    kptis = kptij_lst[:, 0]
    kptjs = kptij_lst[:, 1]
    kpt_ji = kptjs - kptis
    uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji)
    log.debug('Num uniq kpts %d', len(uniq_kpts))
    log.debug2('uniq_kpts %s', uniq_kpts)
    # j2c ~ (-kpt_ji | kpt_ji)
    j2c = fused_cell.pbc_intor('int2c2e_sph', hermi=1, kpts=uniq_kpts)
    j2ctags = []
    nauxs = []
    t1 = log.timer_debug1('2c2e', *t1)

    if h5py.is_hdf5(cderi_file):
        feri = h5py.File(cderi_file)
    else:
        feri = h5py.File(cderi_file, 'w')
    for k, kpt in enumerate(uniq_kpts):
        aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T
        coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs))
        kLR = (aoaux.real * coulG).T
        kLI = (aoaux.imag * coulG).T
        if not kLR.flags.c_contiguous: kLR = lib.transpose(kLR.T)
        if not kLI.flags.c_contiguous: kLI = lib.transpose(kLI.T)
        aoaux = None

        kLR1 = numpy.asarray(kLR[:, naux:], order='C')
        kLI1 = numpy.asarray(kLI[:, naux:], order='C')
        if is_zero(kpt):  # kpti == kptj
            for p0, p1 in mydf.mpi_prange(0, ngs):
                j2cR = lib.ddot(kLR1[p0:p1].T, kLR[p0:p1])
                j2cR = lib.ddot(kLI1[p0:p1].T, kLI[p0:p1], 1, j2cR, 1)
                j2c[k][naux:] -= mpi.allreduce(j2cR)
                j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T
        else:
            for p0, p1 in mydf.mpi_prange(0, ngs):
                j2cR, j2cI = zdotCN(kLR1[p0:p1].T, kLI1[p0:p1].T, kLR[p0:p1],
                                    kLI[p0:p1])
                j2cR = mpi.allreduce(j2cR)
                j2cI = mpi.allreduce(j2cI)
                j2c[k][naux:] -= j2cR + j2cI * 1j
                j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T.conj()
        j2c[k] = fuse(fuse(j2c[k]).T).T
        try:
            feri['j2c/%d' % k] = scipy.linalg.cholesky(j2c[k], lower=True)
            j2ctags.append('CD')
            nauxs.append(naux)
        except scipy.linalg.LinAlgError as e:
            #msg =('===================================\n'
            #      'J-metric not positive definite.\n'
            #      'It is likely that gs is not enough.\n'
            #      '===================================')
            #log.error(msg)
            #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg]))
            w, v = scipy.linalg.eigh(j2c)
            log.debug2('metric linear dependency for kpt %s', uniq_kptji_id)
            log.debug2('cond = %.4g, drop %d bfns', w[0] / w[-1],
                       numpy.count_nonzero(w < LINEAR_DEP_THR))
            v = v[:, w > LINEAR_DEP_THR].T.conj()
            v /= numpy.sqrt(w[w > LINEAR_DEP_THR]).reshape(-1, 1)
            feri['j2c/%d' % k] = v
            j2ctags.append('eig')
            nauxs.append(v.shape[0])
        kLR = kLI = kLR1 = kLI1 = coulG = None
    j2c = None

    aosym_s2 = numpy.einsum('ix->i', abs(kptis - kptjs)) < 1e-9
    j_only = numpy.all(aosym_s2)
    if gamma_point(kptij_lst):
        dtype = 'f8'
    else:
        dtype = 'c16'
    vbar = mydf.auxbar(fused_cell)
    vbar = fuse(vbar)
    ovlp = cell.pbc_intor('int1e_ovlp_sph', hermi=1, kpts=kptjs[aosym_s2])
    ovlp = [lib.pack_tril(s) for s in ovlp]
    t1 = log.timer_debug1('aoaux and int2c', *t1)

    # Estimates the buffer size based on the last contraction in G-space.
    # This contraction requires to hold nkptj copies of (naux,?) array
    # simultaneously in memory.
    mem_now = max(comm.allgather(lib.current_memory()[0]))
    max_memory = max(2000, mydf.max_memory - mem_now)
    nkptj_max = max((uniq_inverse == x).sum() for x in set(uniq_inverse))
    buflen = max(
        int(
            min(max_memory * .5e6 / 16 / naux / (nkptj_max + 2) / nao,
                nao / 3 / mpi.pool.size)), 1)
    chunks = (buflen, nao)

    j3c_jobs = grids2d_int3c_jobs(cell, auxcell, kptij_lst, chunks, j_only)
    log.debug1('max_memory = %d MB (%d in use)  chunks %s', max_memory,
               mem_now, chunks)
    log.debug2('j3c_jobs %s', j3c_jobs)

    if j_only:
        int3c = wrap_int3c(cell, fused_cell, 'int3c2e_sph', 's2', 1, kptij_lst)
    else:
        int3c = wrap_int3c(cell, fused_cell, 'int3c2e_sph', 's1', 1, kptij_lst)
        idxb = numpy.tril_indices(nao)
        idxb = (idxb[0] * nao + idxb[1]).astype('i')
    aux_loc = fused_cell.ao_loc_nr('ssc' in 'int3c2e_sph')

    def gen_int3c(auxcell, job_id, ish0, ish1):
        dataname = 'j3c-chunks/%d' % job_id
        if dataname in feri:
            del (feri[dataname])

        i0 = ao_loc[ish0]
        i1 = ao_loc[ish1]
        dii = i1 * (i1 + 1) // 2 - i0 * (i0 + 1) // 2
        dij = (i1 - i0) * nao
        if j_only:
            buflen = max(8, int(max_memory * 1e6 / 16 / (nkptij * dii + dii)))
        else:
            buflen = max(8, int(max_memory * 1e6 / 16 / (nkptij * dij + dij)))
        auxranges = balance_segs(aux_loc[1:] - aux_loc[:-1], buflen)
        buflen = max([x[2] for x in auxranges])
        buf = numpy.empty(nkptij * dij * buflen, dtype=dtype)
        buf1 = numpy.empty(dij * buflen, dtype=dtype)

        naux = aux_loc[-1]
        for kpt_id, kptij in enumerate(kptij_lst):
            key = '%s/%d' % (dataname, kpt_id)
            if aosym_s2[kpt_id]:
                shape = (naux, dii)
            else:
                shape = (naux, dij)
            if gamma_point(kptij):
                feri.create_dataset(key, shape, 'f8')
            else:
                feri.create_dataset(key, shape, 'c16')

        naux0 = 0
        for istep, auxrange in enumerate(auxranges):
            log.alldebug2("aux_e2 job_id %d step %d", job_id, istep)
            sh0, sh1, nrow = auxrange
            sub_slice = (ish0, ish1, 0, cell.nbas, sh0, sh1)
            if j_only:
                mat = numpy.ndarray((nkptij, dii, nrow),
                                    dtype=dtype,
                                    buffer=buf)
            else:
                mat = numpy.ndarray((nkptij, dij, nrow),
                                    dtype=dtype,
                                    buffer=buf)
            mat = int3c(sub_slice, mat)

            for k, kptij in enumerate(kptij_lst):
                h5dat = feri['%s/%d' % (dataname, k)]
                v = lib.transpose(mat[k], out=buf1)
                if not j_only and aosym_s2[k]:
                    idy = idxb[i0 * (i0 + 1) // 2:i1 *
                               (i1 + 1) // 2] - i0 * nao
                    out = numpy.ndarray((nrow, dii),
                                        dtype=v.dtype,
                                        buffer=mat[k])
                    v = numpy.take(v, idy, axis=1, out=out)
                if gamma_point(kptij):
                    h5dat[naux0:naux0 + nrow] = v.real
                else:
                    h5dat[naux0:naux0 + nrow] = v
            naux0 += nrow

    def ft_fuse(job_id, uniq_kptji_id, sh0, sh1):
        kpt = uniq_kpts[uniq_kptji_id]  # kpt = kptj - kpti
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)

        shls_slice = (auxcell.nbas, fused_cell.nbas)
        Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt)
        Gaux *= mydf.weighted_coulG(kpt, False, gs).reshape(-1, 1)
        kLR = Gaux.real.copy('C')
        kLI = Gaux.imag.copy('C')
        j2c = numpy.asarray(feri['j2c/%d' % uniq_kptji_id])
        j2ctag = j2ctags[uniq_kptji_id]
        naux0 = j2c.shape[0]

        if is_zero(kpt):
            aosym = 's2'
        else:
            aosym = 's1'

        j3cR = [None] * nkptj
        j3cI = [None] * nkptj
        i0 = ao_loc[sh0]
        i1 = ao_loc[sh1]
        for k, idx in enumerate(adapted_ji_idx):
            key = 'j3c-chunks/%d/%d' % (job_id, idx)
            v = numpy.asarray(feri[key])
            if is_zero(kpt):
                for i, c in enumerate(vbar):
                    if c != 0:
                        v[i] -= c * ovlp[k][i0 * (i0 + 1) // 2:i1 *
                                            (i1 + 1) // 2].ravel()
            j3cR[k] = numpy.asarray(v.real, order='C')
            if v.dtype == numpy.complex128:
                j3cI[k] = numpy.asarray(v.imag, order='C')
            v = None

        ncol = j3cR[0].shape[1]
        Gblksize = max(16, int(max_memory * 1e6 / 16 / ncol /
                               (nkptj + 1)))  # +1 for pqkRbuf/pqkIbuf
        Gblksize = min(Gblksize, ngs, 16384)
        pqkRbuf = numpy.empty(ncol * Gblksize)
        pqkIbuf = numpy.empty(ncol * Gblksize)
        buf = numpy.empty(nkptj * ncol * Gblksize, dtype=numpy.complex128)
        log.alldebug2('    blksize (%d,%d)', Gblksize, ncol)

        shls_slice = (sh0, sh1, 0, cell.nbas)
        for p0, p1 in lib.prange(0, ngs, Gblksize):
            dat = ft_ao._ft_aopair_kpts(cell,
                                        Gv[p0:p1],
                                        shls_slice,
                                        aosym,
                                        b,
                                        gxyz[p0:p1],
                                        Gvbase,
                                        kpt,
                                        adapted_kptjs,
                                        out=buf)
            nG = p1 - p0
            for k, ji in enumerate(adapted_ji_idx):
                aoao = dat[k].reshape(nG, ncol)
                pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf)
                pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf)
                pqkR[:] = aoao.real.T
                pqkI[:] = aoao.imag.T

                lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1)
                lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1)
                if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
                    lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1)
                    lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1)

        for k, idx in enumerate(adapted_ji_idx):
            if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                v = fuse(j3cR[k])
            else:
                v = fuse(j3cR[k] + j3cI[k] * 1j)
            if j2ctag == 'CD':
                v = scipy.linalg.solve_triangular(j2c,
                                                  v,
                                                  lower=True,
                                                  overwrite_b=True)
            else:
                v = lib.dot(j2c, v)
            feri['j3c-chunks/%d/%d' % (job_id, idx)][:naux0] = v

    t2 = t1
    j3c_workers = numpy.zeros(len(j3c_jobs), dtype=int)
    #for job_id, ish0, ish1 in mpi.work_share_partition(j3c_jobs):
    for job_id, ish0, ish1 in mpi.work_stealing_partition(j3c_jobs):
        gen_int3c(fused_cell, job_id, ish0, ish1)
        t2 = log.alltimer_debug2('int j3c %d' % job_id, *t2)

        for k, kpt in enumerate(uniq_kpts):
            ft_fuse(job_id, k, ish0, ish1)
            t2 = log.alltimer_debug2('ft-fuse %d k %d' % (job_id, k), *t2)

        j3c_workers[job_id] = rank
    j3c_workers = mpi.allreduce(j3c_workers)
    log.debug2('j3c_workers %s', j3c_workers)
    j2c = kLRs = kLIs = ovlp = vbar = fuse = gen_int3c = ft_fuse = None
    t1 = log.timer_debug1('int3c and fuse', *t1)

    def get_segs_loc(aosym):
        off0 = numpy.asarray([ao_loc[i0] for x, i0, i1 in j3c_jobs])
        off1 = numpy.asarray([ao_loc[i1] for x, i0, i1 in j3c_jobs])
        if aosym:  # s2
            dims = off1 * (off1 + 1) // 2 - off0 * (off0 + 1) // 2
        else:
            dims = (off1 - off0) * nao
        #dims = numpy.asarray([ao_loc[i1]-ao_loc[i0] for x,i0,i1 in j3c_jobs])
        dims = numpy.hstack(
            [dims[j3c_workers == w] for w in range(mpi.pool.size)])
        job_idx = numpy.hstack(
            [numpy.where(j3c_workers == w)[0] for w in range(mpi.pool.size)])
        segs_loc = numpy.append(0, numpy.cumsum(dims))
        segs_loc = [(segs_loc[j], segs_loc[j + 1])
                    for j in numpy.argsort(job_idx)]
        return segs_loc

    segs_loc_s1 = get_segs_loc(False)
    segs_loc_s2 = get_segs_loc(True)

    if 'j3c' in feri: del (feri['j3c'])
    segsize = (max(nauxs) + mpi.pool.size - 1) // mpi.pool.size
    naux0 = rank * segsize
    for k, kptij in enumerate(kptij_lst):
        naux1 = min(nauxs[uniq_inverse[k]], naux0 + segsize)
        nrow = max(0, naux1 - naux0)
        if gamma_point(kptij):
            dtype = 'f8'
        else:
            dtype = 'c16'
        if aosym_s2[k]:
            nao_pair = nao * (nao + 1) // 2
        else:
            nao_pair = nao * nao
        feri.create_dataset('j3c/%d' % k, (nrow, nao_pair),
                            dtype,
                            maxshape=(None, nao_pair))

    def load(k, p0, p1):
        naux1 = nauxs[uniq_inverse[k]]
        slices = [(min(i * segsize + p0, naux1), min(i * segsize + p1, naux1))
                  for i in range(mpi.pool.size)]
        segs = []
        for p0, p1 in slices:
            val = []
            for job_id, worker in enumerate(j3c_workers):
                if rank == worker:
                    key = 'j3c-chunks/%d/%d' % (job_id, k)
                    val.append(feri[key][p0:p1].ravel())
            if val:
                segs.append(numpy.hstack(val))
            else:
                segs.append(numpy.zeros(0))
        return segs

    def save(k, p0, p1, segs):
        segs = mpi.alltoall(segs)
        naux1 = nauxs[uniq_inverse[k]]
        loc0, loc1 = min(p0, naux1 - naux0), min(p1, naux1 - naux0)
        nL = loc1 - loc0
        if nL > 0:
            if aosym_s2[k]:
                segs = numpy.hstack([
                    segs[i0 * nL:i1 * nL].reshape(nL, -1)
                    for i0, i1 in segs_loc_s2
                ])
            else:
                segs = numpy.hstack([
                    segs[i0 * nL:i1 * nL].reshape(nL, -1)
                    for i0, i1 in segs_loc_s1
                ])
            feri['j3c/%d' % k][loc0:loc1] = segs

    mem_now = max(comm.allgather(lib.current_memory()[0]))
    max_memory = max(2000, min(8000, mydf.max_memory - mem_now))
    if numpy.all(aosym_s2):
        if gamma_point(kptij_lst):
            blksize = max(16, int(max_memory * .5e6 / 8 / nao**2))
        else:
            blksize = max(16, int(max_memory * .5e6 / 16 / nao**2))
    else:
        blksize = max(16, int(max_memory * .5e6 / 16 / nao**2 / 2))
    log.debug1('max_momory %d MB (%d in use), blksize %d', max_memory, mem_now,
               blksize)

    t2 = t1
    with lib.call_in_background(save) as async_write:
        for k, kptji in enumerate(kptij_lst):
            for p0, p1 in lib.prange(0, segsize, blksize):
                segs = load(k, p0, p1)
                async_write(k, p0, p1, segs)
                t2 = log.timer_debug1(
                    'assemble k=%d %d:%d (in %d)' % (k, p0, p1, segsize), *t2)

    if 'j3c-chunks' in feri: del (feri['j3c-chunks'])
    if 'j3c-kptij' in feri: del (feri['j3c-kptij'])
    feri['j3c-kptij'] = kptij_lst
    t1 = log.alltimer_debug1('assembling j3c', *t1)
    feri.close()
Exemplo n.º 18
0
    def ft_fuse(job_id, uniq_kptji_id, sh0, sh1):
        kpt = uniq_kpts[uniq_kptji_id]  # kpt = kptj - kpti
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)

        Gaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T
        Gaux = fuse(Gaux)
        Gaux *= mydf.weighted_coulG(kpt, False, mesh)
        kLR = lib.transpose(numpy.asarray(Gaux.real, order='C'))
        kLI = lib.transpose(numpy.asarray(Gaux.imag, order='C'))
        j2c = numpy.asarray(fswap['j2c/%d'%uniq_kptji_id])
        j2ctag = j2ctags[uniq_kptji_id]
        naux0 = j2c.shape[0]
        if ('j2c-/%d' % uniq_kptji_id) in fswap:
            j2c_negative = numpy.asarray(fswap['j2c-/%d'%uniq_kptji_id])
        else:
            j2c_negative = None

        if is_zero(kpt):
            aosym = 's2'
        else:
            aosym = 's1'

        if aosym == 's2' and cell.dimension == 3:
            vbar = fuse(mydf.auxbar(fused_cell))
            ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs)
            ovlp = [lib.pack_tril(s) for s in ovlp]

        j3cR = [None] * nkptj
        j3cI = [None] * nkptj
        i0 = ao_loc[sh0]
        i1 = ao_loc[sh1]
        for k, idx in enumerate(adapted_ji_idx):
            key = 'j3c-chunks/%d/%d' % (job_id, idx)
            v = fuse(numpy.asarray(fswap[key]))
            if aosym == 's2' and cell.dimension == 3:
                for i in numpy.where(vbar != 0)[0]:
                    v[i] -= vbar[i] * ovlp[k][i0*(i0+1)//2:i1*(i1+1)//2].ravel()
            j3cR[k] = numpy.asarray(v.real, order='C')
            if v.dtype == numpy.complex128:
                j3cI[k] = numpy.asarray(v.imag, order='C')
            v = None

        ncol = j3cR[0].shape[1]
        Gblksize = max(16, int(max_memory*1e6/16/ncol/(nkptj+1)))  # +1 for pqkRbuf/pqkIbuf
        Gblksize = min(Gblksize, ngrids, 16384)
        pqkRbuf = numpy.empty(ncol*Gblksize)
        pqkIbuf = numpy.empty(ncol*Gblksize)
        buf = numpy.empty(nkptj*ncol*Gblksize, dtype=numpy.complex128)
        log.alldebug2('    blksize (%d,%d)', Gblksize, ncol)

        if aosym == 's2':
            shls_slice = (sh0, sh1, 0, sh1)
        else:
            shls_slice = (sh0, sh1, 0, cell.nbas)
        for p0, p1 in lib.prange(0, ngrids, Gblksize):
            dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b,
                                        gxyz[p0:p1], Gvbase, kpt,
                                        adapted_kptjs, out=buf)
            nG = p1 - p0
            for k, ji in enumerate(adapted_ji_idx):
                aoao = dat[k].reshape(nG,ncol)
                pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf)
                pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf)
                pqkR[:] = aoao.real.T
                pqkI[:] = aoao.imag.T

                lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1)
                lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1)
                if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
                    lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1)
                    lib.dot(kLI[p0:p1].T, pqkR.T,  1, j3cI[k], 1)

        for k, idx in enumerate(adapted_ji_idx):
            if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                v = j3cR[k]
            else:
                v = j3cR[k] + j3cI[k] * 1j
            if j2ctag == 'CD':
                v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True)
                fswap['j3c-chunks/%d/%d'%(job_id,idx)][:naux0] = v
            else:
                fswap['j3c-chunks/%d/%d'%(job_id,idx)][:naux0] = lib.dot(j2c, v)

            # low-dimension systems
            if j2c_negative is not None:
                fswap['j3c-/%d/%d'%(job_id,idx)] = lib.dot(j2c_negative, v)
Exemplo n.º 19
0
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file):
    log = logger.Logger(mydf.stdout, mydf.verbose)
    t1 = t0 = (time.clock(), time.time())

    fused_cell, fuse = fuse_auxcell(mydf, mydf.auxcell)
    ao_loc = cell.ao_loc_nr()
    nao = ao_loc[-1]
    naux = auxcell.nao_nr()
    nkptij = len(kptij_lst)
    mesh = mydf.mesh
    Gv, Gvbase, kws = cell.get_Gv_weights(mesh)
    b = cell.reciprocal_vectors()
    gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase])
    ngrids = gxyz.shape[0]

    kptis = kptij_lst[:, 0]
    kptjs = kptij_lst[:, 1]
    kpt_ji = kptjs - kptis
    uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji)
    log.debug('Num uniq kpts %d', len(uniq_kpts))
    log.debug2('uniq_kpts %s', uniq_kpts)
    # j2c ~ (-kpt_ji | kpt_ji)
    j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts)
    j2ctags = []
    t1 = log.timer_debug1('2c2e', *t1)

    swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file))
    fswap = lib.H5TmpFile(swapfile.name)
    # Unlink swapfile to avoid trash
    swapfile = None

    mem_now = max(comm.allgather(lib.current_memory()[0]))
    max_memory = max(2000, mydf.max_memory - mem_now)
    blksize = max(2048, int(max_memory * .5e6 / 16 / fused_cell.nao_nr()))
    log.debug2('max_memory %s (MB)  blocksize %s', max_memory, blksize)
    for k, kpt in enumerate(uniq_kpts):
        coulG = mydf.weighted_coulG(kpt, False, mesh)
        j2c_k = numpy.zeros_like(j2c[k])
        for p0, p1 in mydf.prange(0, ngrids, blksize):
            aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1],
                                Gvbase, kpt).T
            LkR = numpy.asarray(aoaux.real, order='C')
            LkI = numpy.asarray(aoaux.imag, order='C')
            aoaux = None

            if is_zero(kpt):  # kpti == kptj
                j2c_k[naux:] += lib.ddot(LkR[naux:] * coulG[p0:p1], LkR.T)
                j2c_k[naux:] += lib.ddot(LkI[naux:] * coulG[p0:p1], LkI.T)
            else:
                j2cR, j2cI = zdotCN(LkR[naux:] * coulG[p0:p1],
                                    LkI[naux:] * coulG[p0:p1], LkR.T, LkI.T)
                j2c_k[naux:] += j2cR + j2cI * 1j
            kLR = kLI = None

        j2c_k[:naux, naux:] = j2c_k[naux:, :naux].conj().T
        j2c[k] -= mpi.allreduce(j2c_k)
        j2c[k] = fuse(fuse(j2c[k]).T).T
        try:
            fswap['j2c/%d' % k] = scipy.linalg.cholesky(j2c[k], lower=True)
            j2ctags.append('CD')
        except scipy.linalg.LinAlgError as e:
            #msg =('===================================\n'
            #      'J-metric not positive definite.\n'
            #      'It is likely that mesh is not enough.\n'
            #      '===================================')
            #log.error(msg)
            #raise scipy.linalg.LinAlgError('\n'.join([str(e), msg]))
            w, v = scipy.linalg.eigh(j2c[k])
            log.debug2('metric linear dependency for kpt %s', k)
            log.debug2('cond = %.4g, drop %d bfns', w[0] / w[-1],
                       numpy.count_nonzero(w < mydf.linear_dep_threshold))
            v1 = v[:, w > mydf.linear_dep_threshold].T.conj()
            v1 /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1)
            fswap['j2c/%d' % k] = v1
            if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum':
                idx = numpy.where(w < -mydf.linear_dep_threshold)[0]
                if len(idx) > 0:
                    fswap['j2c-/%d' % k] = (v[:, idx] /
                                            numpy.sqrt(-w[idx])).conj().T
            w = v = v1 = None
            j2ctags.append('eig')
    j2c = coulG = None

    aosym_s2 = numpy.einsum('ix->i', abs(kptis - kptjs)) < 1e-9
    j_only = numpy.all(aosym_s2)
    if gamma_point(kptij_lst):
        dtype = 'f8'
    else:
        dtype = 'c16'
    t1 = log.timer_debug1('aoaux and int2c', *t1)

    # Estimates the buffer size based on the last contraction in G-space.
    # This contraction requires to hold nkptj copies of (naux,?) array
    # simultaneously in memory.
    mem_now = max(comm.allgather(lib.current_memory()[0]))
    max_memory = max(2000, mydf.max_memory - mem_now)
    nkptj_max = max((uniq_inverse == x).sum() for x in set(uniq_inverse))
    buflen = max(
        int(
            min(max_memory * .5e6 / 16 / naux / (nkptj_max + 2) / nao,
                nao / 3 / mpi.pool.size)), 1)
    chunks = (buflen, nao)

    j3c_jobs = grids2d_int3c_jobs(cell, auxcell, kptij_lst, chunks, j_only)
    log.debug1('max_memory = %d MB (%d in use)  chunks %s', max_memory,
               mem_now, chunks)
    log.debug2('j3c_jobs %s', j3c_jobs)

    if j_only:
        int3c = wrap_int3c(cell, fused_cell, 'int3c2e', 's2', 1, kptij_lst)
    else:
        int3c = wrap_int3c(cell, fused_cell, 'int3c2e', 's1', 1, kptij_lst)
        idxb = numpy.tril_indices(nao)
        idxb = (idxb[0] * nao + idxb[1]).astype('i')
    aux_loc = fused_cell.ao_loc_nr('ssc' in 'int3c2e')

    def gen_int3c(job_id, ish0, ish1):
        dataname = 'j3c-chunks/%d' % job_id
        i0 = ao_loc[ish0]
        i1 = ao_loc[ish1]
        dii = i1 * (i1 + 1) // 2 - i0 * (i0 + 1) // 2
        if j_only:
            dij = dii
            buflen = max(8, int(max_memory * 1e6 / 16 / (nkptij * dii + dii)))
        else:
            dij = (i1 - i0) * nao
            buflen = max(8, int(max_memory * 1e6 / 16 / (nkptij * dij + dij)))
        auxranges = balance_segs(aux_loc[1:] - aux_loc[:-1], buflen)
        buflen = max([x[2] for x in auxranges])
        buf = numpy.empty(nkptij * dij * buflen, dtype=dtype)
        buf1 = numpy.empty(dij * buflen, dtype=dtype)

        naux = aux_loc[-1]
        for kpt_id, kptij in enumerate(kptij_lst):
            key = '%s/%d' % (dataname, kpt_id)
            if aosym_s2[kpt_id]:
                shape = (naux, dii)
            else:
                shape = (naux, dij)
            if gamma_point(kptij):
                fswap.create_dataset(key, shape, 'f8')
            else:
                fswap.create_dataset(key, shape, 'c16')

        naux0 = 0
        for istep, auxrange in enumerate(auxranges):
            log.alldebug2("aux_e1 job_id %d step %d", job_id, istep)
            sh0, sh1, nrow = auxrange
            sub_slice = (ish0, ish1, 0, cell.nbas, sh0, sh1)
            mat = numpy.ndarray((nkptij, dij, nrow), dtype=dtype, buffer=buf)
            mat = int3c(sub_slice, mat)

            for k, kptij in enumerate(kptij_lst):
                h5dat = fswap['%s/%d' % (dataname, k)]
                v = lib.transpose(mat[k], out=buf1)
                if not j_only and aosym_s2[k]:
                    idy = idxb[i0 * (i0 + 1) // 2:i1 *
                               (i1 + 1) // 2] - i0 * nao
                    out = numpy.ndarray((nrow, dii),
                                        dtype=v.dtype,
                                        buffer=mat[k])
                    v = numpy.take(v, idy, axis=1, out=out)
                if gamma_point(kptij):
                    h5dat[naux0:naux0 + nrow] = v.real
                else:
                    h5dat[naux0:naux0 + nrow] = v
            naux0 += nrow

    def ft_fuse(job_id, uniq_kptji_id, sh0, sh1):
        kpt = uniq_kpts[uniq_kptji_id]  # kpt = kptj - kpti
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)

        j2c = numpy.asarray(fswap['j2c/%d' % uniq_kptji_id])
        j2ctag = j2ctags[uniq_kptji_id]
        naux0 = j2c.shape[0]
        if ('j2c-/%d' % uniq_kptji_id) in fswap:
            j2c_negative = numpy.asarray(fswap['j2c-/%d' % uniq_kptji_id])
        else:
            j2c_negative = None

        if is_zero(kpt):
            aosym = 's2'
        else:
            aosym = 's1'

        if aosym == 's2' and cell.dimension == 3:
            vbar = fuse(mydf.auxbar(fused_cell))
            ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs)
            ovlp = [lib.pack_tril(s) for s in ovlp]

        j3cR = [None] * nkptj
        j3cI = [None] * nkptj
        i0 = ao_loc[sh0]
        i1 = ao_loc[sh1]
        for k, idx in enumerate(adapted_ji_idx):
            key = 'j3c-chunks/%d/%d' % (job_id, idx)
            v = numpy.asarray(fswap[key])
            if aosym == 's2' and cell.dimension == 3:
                for i in numpy.where(vbar != 0)[0]:
                    v[i] -= vbar[i] * ovlp[k][i0 * (i0 + 1) // 2:i1 *
                                              (i1 + 1) // 2].ravel()
            j3cR[k] = numpy.asarray(v.real, order='C')
            if v.dtype == numpy.complex128:
                j3cI[k] = numpy.asarray(v.imag, order='C')
            v = None

        ncol = j3cR[0].shape[1]
        Gblksize = max(16, int(max_memory * 1e6 / 16 / ncol /
                               (nkptj + 1)))  # +1 for pqkRbuf/pqkIbuf
        Gblksize = min(Gblksize, ngrids, 16384)
        pqkRbuf = numpy.empty(ncol * Gblksize)
        pqkIbuf = numpy.empty(ncol * Gblksize)
        buf = numpy.empty(nkptj * ncol * Gblksize, dtype=numpy.complex128)
        log.alldebug2('job_id %d  blksize (%d,%d)', job_id, Gblksize, ncol)

        wcoulG = mydf.weighted_coulG(kpt, False, mesh)
        fused_cell_slice = (auxcell.nbas, fused_cell.nbas)
        if aosym == 's2':
            shls_slice = (sh0, sh1, 0, sh1)
        else:
            shls_slice = (sh0, sh1, 0, cell.nbas)
        for p0, p1 in lib.prange(0, ngrids, Gblksize):
            Gaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], fused_cell_slice, b,
                               gxyz[p0:p1], Gvbase, kpt)
            Gaux *= wcoulG[p0:p1, None]
            kLR = Gaux.real.copy('C')
            kLI = Gaux.imag.copy('C')
            Gaux = None

            dat = ft_ao._ft_aopair_kpts(cell,
                                        Gv[p0:p1],
                                        shls_slice,
                                        aosym,
                                        b,
                                        gxyz[p0:p1],
                                        Gvbase,
                                        kpt,
                                        adapted_kptjs,
                                        out=buf)
            nG = p1 - p0
            for k, ji in enumerate(adapted_ji_idx):
                aoao = dat[k].reshape(nG, ncol)
                pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf)
                pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf)
                pqkR[:] = aoao.real.T
                pqkI[:] = aoao.imag.T

                lib.dot(kLR.T, pqkR.T, -1, j3cR[k][naux:], 1)
                lib.dot(kLI.T, pqkI.T, -1, j3cR[k][naux:], 1)
                if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
                    lib.dot(kLR.T, pqkI.T, -1, j3cI[k][naux:], 1)
                    lib.dot(kLI.T, pqkR.T, 1, j3cI[k][naux:], 1)
            kLR = kLI = None

        for k, idx in enumerate(adapted_ji_idx):
            if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                v = fuse(j3cR[k])
            else:
                v = fuse(j3cR[k] + j3cI[k] * 1j)
            if j2ctag == 'CD':
                v = scipy.linalg.solve_triangular(j2c,
                                                  v,
                                                  lower=True,
                                                  overwrite_b=True)
                fswap['j3c-chunks/%d/%d' % (job_id, idx)][:naux0] = v
            else:
                fswap['j3c-chunks/%d/%d' % (job_id, idx)][:naux0] = lib.dot(
                    j2c, v)

            # low-dimension systems
            if j2c_negative is not None:
                fswap['j3c-/%d/%d' % (job_id, idx)] = lib.dot(j2c_negative, v)

    _assemble(mydf, kptij_lst, j3c_jobs, gen_int3c, ft_fuse, cderi_file, fswap,
              log)
Exemplo n.º 20
0
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file):
    log = logger.Logger(mydf.stdout, mydf.verbose)
    t1 = t0 = (time.clock(), time.time())

    fused_cell, fuse = fuse_auxcell(mydf, mydf.auxcell)
    ao_loc = cell.ao_loc_nr()
    nao = ao_loc[-1]
    naux = auxcell.nao_nr()
    nkptij = len(kptij_lst)
    mesh = mydf.mesh
    Gv, Gvbase, kws = cell.get_Gv_weights(mesh)
    b = cell.reciprocal_vectors()
    gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase])
    ngrids = gxyz.shape[0]

    kptis = kptij_lst[:,0]
    kptjs = kptij_lst[:,1]
    kpt_ji = kptjs - kptis
    uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji)
    log.debug('Num uniq kpts %d', len(uniq_kpts))
    log.debug2('uniq_kpts %s', uniq_kpts)
    # j2c ~ (-kpt_ji | kpt_ji)
    j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts)
    j2ctags = []
    t1 = log.timer_debug1('2c2e', *t1)

    swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file))
    fswap = lib.H5TmpFile(swapfile.name)
    # Unlink swapfile to avoid trash
    swapfile = None

    for k, kpt in enumerate(uniq_kpts):
        coulG = mydf.weighted_coulG(kpt, False, mesh)
        j2c[k] = fuse(fuse(j2c[k]).T).T.copy()
        j2c_k = numpy.zeros_like(j2c[k])
        for p0, p1 in mydf.mpi_prange(0, ngrids):
            aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt).T
            aoaux = fuse(aoaux)
            LkR = numpy.asarray(aoaux.real, order='C')
            LkI = numpy.asarray(aoaux.imag, order='C')
            aoaux = None

            if is_zero(kpt):  # kpti == kptj
                j2cR   = lib.dot(LkR*coulG[p0:p1], LkR.T)
                j2c_k += lib.dot(LkI*coulG[p0:p1], LkI.T, 1, j2cR, 1)
            else:
                # aoaux ~ kpt_ij, aoaux.conj() ~ kpt_kl
                j2cR, j2cI = zdotCN(LkR*coulG[p0:p1], LkI*coulG[p0:p1], LkR.T, LkI.T)
                j2c_k += j2cR + j2cI * 1j
            LkR = LkI = None
        j2c[k] -= mpi.allreduce(j2c_k)

        try:
            fswap['j2c/%d'%k] = scipy.linalg.cholesky(j2c[k], lower=True)
            j2ctags.append('CD')
        except scipy.linalg.LinAlgError:
            w, v = scipy.linalg.eigh(j2c[k])
            log.debug2('metric linear dependency for kpt %s', k)
            log.debug2('cond = %.4g, drop %d bfns',
                       w[0]/w[-1], numpy.count_nonzero(w<mydf.linear_dep_threshold))
            v1 = v[:,w>mydf.linear_dep_threshold].T.conj()
            v1 /= numpy.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1)
            fswap['j2c/%d'%k] = v1
            if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum':
                idx = numpy.where(w < -mydf.linear_dep_threshold)[0]
                if len(idx) > 0:
                    fswap['j2c-/%d'%k] = (v[:,idx]/numpy.sqrt(-w[idx])).conj().T
            w = v = v1 = v2 = None
            j2ctags.append('eig')
        aoaux = kLR = kLI = j2cR = j2cI = coulG = None
    j2c = None

    aosym_s2 = numpy.einsum('ix->i', abs(kptis-kptjs)) < 1e-9
    j_only = numpy.all(aosym_s2)
    if gamma_point(kptij_lst):
        dtype = 'f8'
    else:
        dtype = 'c16'
    t1 = log.timer_debug1('aoaux and int2c', *t1)

# Estimates the buffer size based on the last contraction in G-space.
# This contraction requires to hold nkptj copies of (naux,?) array
# simultaneously in memory.
    mem_now = max(comm.allgather(lib.current_memory()[0]))
    max_memory = max(2000, mydf.max_memory - mem_now)
    nkptj_max = max((uniq_inverse==x).sum() for x in set(uniq_inverse))
    buflen = max(int(min(max_memory*.5e6/16/naux/(nkptj_max+2)/nao,
                         nao/3/mpi.pool.size)), 1)
    chunks = (buflen, nao)

    j3c_jobs = mpi_df.grids2d_int3c_jobs(cell, auxcell, kptij_lst, chunks, j_only)
    log.debug1('max_memory = %d MB (%d in use)  chunks %s',
               max_memory, mem_now, chunks)
    log.debug2('j3c_jobs %s', j3c_jobs)

    if j_only:
        int3c = wrap_int3c(cell, fused_cell, 'int3c2e', 's2', 1, kptij_lst)
    else:
        int3c = wrap_int3c(cell, fused_cell, 'int3c2e', 's1', 1, kptij_lst)
        idxb = numpy.tril_indices(nao)
        idxb = (idxb[0] * nao + idxb[1]).astype('i')
    aux_loc = fused_cell.ao_loc_nr(fused_cell.cart)

    def gen_int3c(job_id, ish0, ish1):
        dataname = 'j3c-chunks/%d' % job_id
        i0 = ao_loc[ish0]
        i1 = ao_loc[ish1]
        dii = i1*(i1+1)//2 - i0*(i0+1)//2
        dij = (i1 - i0) * nao
        if j_only:
            buflen = max(8, int(max_memory*1e6/16/(nkptij*dii+dii)))
        else:
            buflen = max(8, int(max_memory*1e6/16/(nkptij*dij+dij)))
        auxranges = balance_segs(aux_loc[1:]-aux_loc[:-1], buflen)
        buflen = max([x[2] for x in auxranges])
        buf = numpy.empty(nkptij*dij*buflen, dtype=dtype)
        buf1 = numpy.empty(dij*buflen, dtype=dtype)

        naux = aux_loc[-1]
        for kpt_id, kptij in enumerate(kptij_lst):
            key = '%s/%d' % (dataname, kpt_id)
            if aosym_s2[kpt_id]:
                shape = (naux, dii)
            else:
                shape = (naux, dij)
            if gamma_point(kptij):
                fswap.create_dataset(key, shape, 'f8')
            else:
                fswap.create_dataset(key, shape, 'c16')

        naux0 = 0
        for istep, auxrange in enumerate(auxranges):
            log.alldebug2("aux_e1 job_id %d step %d", job_id, istep)
            sh0, sh1, nrow = auxrange
            sub_slice = (ish0, ish1, 0, cell.nbas, sh0, sh1)
            if j_only:
                mat = numpy.ndarray((nkptij,dii,nrow), dtype=dtype, buffer=buf)
            else:
                mat = numpy.ndarray((nkptij,dij,nrow), dtype=dtype, buffer=buf)
            mat = int3c(sub_slice, mat)

            for k, kptij in enumerate(kptij_lst):
                h5dat = fswap['%s/%d'%(dataname,k)]
                v = lib.transpose(mat[k], out=buf1)
                if not j_only and aosym_s2[k]:
                    idy = idxb[i0*(i0+1)//2:i1*(i1+1)//2] - i0 * nao
                    out = numpy.ndarray((nrow,dii), dtype=v.dtype, buffer=mat[k])
                    v = numpy.take(v, idy, axis=1, out=out)
                if gamma_point(kptij):
                    h5dat[naux0:naux0+nrow] = v.real
                else:
                    h5dat[naux0:naux0+nrow] = v
            naux0 += nrow

    def ft_fuse(job_id, uniq_kptji_id, sh0, sh1):
        kpt = uniq_kpts[uniq_kptji_id]  # kpt = kptj - kpti
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)

        Gaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T
        Gaux = fuse(Gaux)
        Gaux *= mydf.weighted_coulG(kpt, False, mesh)
        kLR = lib.transpose(numpy.asarray(Gaux.real, order='C'))
        kLI = lib.transpose(numpy.asarray(Gaux.imag, order='C'))
        j2c = numpy.asarray(fswap['j2c/%d'%uniq_kptji_id])
        j2ctag = j2ctags[uniq_kptji_id]
        naux0 = j2c.shape[0]
        if ('j2c-/%d' % uniq_kptji_id) in fswap:
            j2c_negative = numpy.asarray(fswap['j2c-/%d'%uniq_kptji_id])
        else:
            j2c_negative = None

        if is_zero(kpt):
            aosym = 's2'
        else:
            aosym = 's1'

        if aosym == 's2' and cell.dimension == 3:
            vbar = fuse(mydf.auxbar(fused_cell))
            ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs)
            ovlp = [lib.pack_tril(s) for s in ovlp]

        j3cR = [None] * nkptj
        j3cI = [None] * nkptj
        i0 = ao_loc[sh0]
        i1 = ao_loc[sh1]
        for k, idx in enumerate(adapted_ji_idx):
            key = 'j3c-chunks/%d/%d' % (job_id, idx)
            v = fuse(numpy.asarray(fswap[key]))
            if aosym == 's2' and cell.dimension == 3:
                for i in numpy.where(vbar != 0)[0]:
                    v[i] -= vbar[i] * ovlp[k][i0*(i0+1)//2:i1*(i1+1)//2].ravel()
            j3cR[k] = numpy.asarray(v.real, order='C')
            if v.dtype == numpy.complex128:
                j3cI[k] = numpy.asarray(v.imag, order='C')
            v = None

        ncol = j3cR[0].shape[1]
        Gblksize = max(16, int(max_memory*1e6/16/ncol/(nkptj+1)))  # +1 for pqkRbuf/pqkIbuf
        Gblksize = min(Gblksize, ngrids, 16384)
        pqkRbuf = numpy.empty(ncol*Gblksize)
        pqkIbuf = numpy.empty(ncol*Gblksize)
        buf = numpy.empty(nkptj*ncol*Gblksize, dtype=numpy.complex128)
        log.alldebug2('    blksize (%d,%d)', Gblksize, ncol)

        if aosym == 's2':
            shls_slice = (sh0, sh1, 0, sh1)
        else:
            shls_slice = (sh0, sh1, 0, cell.nbas)
        for p0, p1 in lib.prange(0, ngrids, Gblksize):
            dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b,
                                        gxyz[p0:p1], Gvbase, kpt,
                                        adapted_kptjs, out=buf)
            nG = p1 - p0
            for k, ji in enumerate(adapted_ji_idx):
                aoao = dat[k].reshape(nG,ncol)
                pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf)
                pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf)
                pqkR[:] = aoao.real.T
                pqkI[:] = aoao.imag.T

                lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k], 1)
                lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k], 1)
                if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
                    lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k], 1)
                    lib.dot(kLI[p0:p1].T, pqkR.T,  1, j3cI[k], 1)

        for k, idx in enumerate(adapted_ji_idx):
            if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                v = j3cR[k]
            else:
                v = j3cR[k] + j3cI[k] * 1j
            if j2ctag == 'CD':
                v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True)
                fswap['j3c-chunks/%d/%d'%(job_id,idx)][:naux0] = v
            else:
                fswap['j3c-chunks/%d/%d'%(job_id,idx)][:naux0] = lib.dot(j2c, v)

            # low-dimension systems
            if j2c_negative is not None:
                fswap['j3c-/%d/%d'%(job_id,idx)] = lib.dot(j2c_negative, v)

    mpi_df._assemble(mydf, kptij_lst, j3c_jobs, gen_int3c, ft_fuse, cderi_file, fswap, log)