Python gemmdot примеры использования

Язык программирования: Python

Пространство имен/Пакет: gpaw.utilities.blas

Метод/Функция: gemmdot

Примеров на hotexamples.com: 35

Python gemmdot - 35 примеров найдено. Это лучшие примеры Python кода для gpaw.utilities.blas.gemmdot, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: base.py Проект: yihsuanliu/gpaw

    def get_phi_aGp(self):
        setups = self.calc.wfs.setups
        spos_ac = self.calc.atoms.get_scaled_positions()

        kk_Gv = gemmdot(self.q_c + self.Gvec_Gc,
                        self.bcell_cv.copy(),
                        beta=0.0)
        phi_aGp = {}
        for a, id in enumerate(setups.id_a):
            phi_aGp[a] = two_phi_planewave_integrals(kk_Gv, setups[a])
            for iG in range(self.npw):
                phi_aGp[a][iG] *= np.exp(
                    -1j * 2. * pi *
                    np.dot(self.q_c + self.Gvec_Gc[iG], spos_ac[a]))

        # For optical limit, G == 0 part should change
        if self.optical_limit:
            for a, id in enumerate(setups.id_a):
                nabla_iiv = setups[a].nabla_iiv
                phi_aGp[a][0] = -1j * (np.dot(nabla_iiv, self.qq_v)).ravel()

        self.phi_aGp = phi_aGp
        self.printtxt('')
        self.printtxt('Finished phi_Gp !')

        return

Пример #2

Показать файл

    def get_phi_aGp(self, q_c=None, parallel=True, alldir=False):
        if q_c is None:
            q_c = self.q_c
            qq_v = self.qq_v
            optical_limit = self.optical_limit
        else:
            optical_limit = False
            if np.abs(q_c).sum() < 1e-8:
                q_c = np.array([0.0001, 0, 0])
                optical_limit = True
            qq_v = np.dot(q_c, self.bcell_cv)

        setups = self.calc.wfs.setups
        spos_ac = self.calc.atoms.get_scaled_positions()

        kk_Gv = gemmdot(q_c + self.Gvec_Gc, self.bcell_cv.copy(), beta=0.0)
        phi_aGp = {}
        phiG0_avp = {}

        if parallel:
            from gpaw.response.parallel import parallel_partition

            npw, npw_local, Gstart, Gend = parallel_partition(self.npw,
                                                              self.comm.rank,
                                                              self.comm.size,
                                                              reshape=False)
        else:
            Gstart = 0
            Gend = self.npw

        for a, id in enumerate(setups.id_a):
            phi_aGp[a] = two_phi_planewave_integrals(kk_Gv, setups[a], Gstart,
                                                     Gend)
            for iG in range(Gstart, Gend):
                phi_aGp[a][iG] *= np.exp(
                    -1j * 2. * pi * np.dot(q_c + self.Gvec_Gc[iG], spos_ac[a]))
            if parallel:
                self.comm.sum(phi_aGp[a])
        # For optical limit, G == 0 part should change
        if optical_limit:
            for a, id in enumerate(setups.id_a):
                nabla_iiv = setups[a].nabla_iiv
                phi_aGp[a][0] = -1j * (np.dot(nabla_iiv, qq_v)).ravel()

                phiG0_avp[a] = np.zeros((3, len(phi_aGp[a][0])), complex)
                for dir in range(3):  # 3 dimension
                    q2_c = np.diag((1, 1, 1))[dir] * self.qopt
                    qq2_v = np.dot(q2_c, self.bcell_cv)  # summation over c
                    phiG0_avp[a][dir] = -1j * (np.dot(nabla_iiv,
                                                      qq2_v)).ravel()

        if alldir:
            return phi_aGp, phiG0_avp
        else:
            return phi_aGp

Пример #3

Показать файл

    def calculate_local_kernel(self):
        # Standard ALDA exchange kernel
        # Use with care. Results are very difficult to converge
        # Sensitive to density_cut
        ns = self.calc.wfs.nspins
        gd = self.gd
        pd = self.pd
        cell_cv = gd.cell_cv
        icell_cv = 2 * np.pi * np.linalg.inv(cell_cv)
        vol = np.linalg.det(cell_cv)

        fxc_sg = ns * self.get_fxc_g(ns * self.n_g)
        fxc_sg[np.where(self.n_g < self.density_cut)] = 0.0

        r_vg = gd.get_grid_point_coordinates()

        for iq in range(len(self.ibzq_qc)):
            Gvec_Gc = np.dot(pd.get_reciprocal_vectors(q=iq, add_q=False),
                             cell_cv / (2 * np.pi))
            npw = len(Gvec_Gc)
            l_pw_size = -(-npw // mpi.world.size)
            l_pw_range = range(mpi.world.rank * l_pw_size,
                               min((mpi.world.rank + 1) * l_pw_size, npw))
            fhxc_sGsG = np.zeros((ns * npw, ns * npw), dtype=complex)
            for s in range(ns):
                for iG in l_pw_range:
                    for jG in range(npw):
                        fxc = fxc_sg[s].copy()
                        dG_c = Gvec_Gc[iG] - Gvec_Gc[jG]
                        dG_v = np.dot(dG_c, icell_cv)
                        dGr_g = gemmdot(dG_v, r_vg, beta=0.0)
                        ft_fxc = gd.integrate(np.exp(-1j * dGr_g) * fxc)
                        fhxc_sGsG[s * npw + iG, s * npw + jG] = ft_fxc

            mpi.world.sum(fhxc_sGsG)
            fhxc_sGsG /= vol

            Gq2_G = self.pd.G2_qG[iq]
            if (self.ibzq_qc[iq] == 0).all():
                Gq2_G[0] = 1.
            vq_G = 4 * np.pi / Gq2_G
            fhxc_sGsG += np.tile(np.eye(npw) * vq_G, (ns, ns))

            if mpi.rank == 0:
                w = Writer('fhxc_%s_%s_%s_%s.gpw' %
                           (self.tag, self.xc, self.ecut, iq))
                w.dimension('sG', ns * npw)
                w.add('fhxc_sGsG', ('sG', 'sG'), dtype=complex)
                w.fill(fhxc_sGsG)
                w.close()
            mpi.world.barrier()
        prnt(file=self.fd)

Пример #4

Показать файл

Файл: fxc.py Проект: ryancoleman/lotsofcoresbook2code

    def calculate_local_kernel(self):
        # Standard ALDA exchange kernel
        # Use with care. Results are very difficult to converge
        # Sensitive to density_cut
        ns = self.calc.wfs.nspins
        gd = self.gd
        pd = self.pd
        cell_cv = gd.cell_cv
        icell_cv = 2 * np.pi * np.linalg.inv(cell_cv)
        vol = np.linalg.det(cell_cv)

        fxc_sg = ns * self.get_fxc_g(ns * self.n_g)
        fxc_sg[np.where(self.n_g < self.density_cut)] = 0.0

        r_vg = gd.get_grid_point_coordinates()

        for iq in range(len(self.ibzq_qc)):
            Gvec_Gc = np.dot(pd.get_reciprocal_vectors(q=iq, add_q=False),
                             cell_cv / (2 * np.pi))
            npw = len(Gvec_Gc)
            l_pw_size = -(-npw // mpi.world.size)
            l_pw_range = range(mpi.world.rank * l_pw_size,
                               min((mpi.world.rank + 1) * l_pw_size, npw))
            fhxc_sGsG = np.zeros((ns * npw, ns * npw), dtype=complex)
            for s in range(ns):
                for iG in l_pw_range:
                    for jG in range(npw):
                        fxc = fxc_sg[s].copy()
                        dG_c = Gvec_Gc[iG] - Gvec_Gc[jG]
                        dG_v = np.dot(dG_c, icell_cv)
                        dGr_g = gemmdot(dG_v, r_vg, beta=0.0)
                        ft_fxc = gd.integrate(np.exp(-1j * dGr_g) * fxc)
                        fhxc_sGsG[s * npw + iG, s * npw + jG] = ft_fxc

            mpi.world.sum(fhxc_sGsG)
            fhxc_sGsG /= vol

            Gq2_G = self.pd.G2_qG[iq]
            if (self.ibzq_qc[iq] == 0).all():
                Gq2_G[0] = 1.
            vq_G = 4 * np.pi / Gq2_G
            fhxc_sGsG += np.tile(np.eye(npw) * vq_G, (ns, ns))

            if mpi.rank == 0:
                w = Writer('fhxc_%s_%s_%s_%s.gpw' %
                           (self.tag, self.xc, self.ecut, iq))
                w.dimension('sG', ns * npw)
                w.add('fhxc_sGsG', ('sG', 'sG'), dtype=complex)
                w.fill(fhxc_sGsG)
                w.close()
            mpi.world.barrier()
        prnt(file=self.fd)

Пример #5

Показать файл

Файл: base.py Проект: ryancoleman/lotsofcoresbook2code

    def get_phi_aGp(self, q_c=None, parallel=True, alldir=False):
        if q_c is None:
            q_c = self.q_c
            qq_v = self.qq_v
            optical_limit = self.optical_limit
        else:
            optical_limit = False
            if np.abs(q_c).sum() < 1e-8:
                q_c = np.array([0.0001, 0, 0])
                optical_limit = True
            qq_v = np.dot(q_c, self.bcell_cv)
            
        setups = self.calc.wfs.setups
        spos_ac = self.calc.atoms.get_scaled_positions()
        
        kk_Gv = gemmdot(q_c + self.Gvec_Gc, self.bcell_cv.copy(), beta=0.0)
        phi_aGp = {}
        phiG0_avp = {}

        if parallel:
            from gpaw.response.parallel import parallel_partition

            npw, npw_local, Gstart, Gend = parallel_partition(
                               self.npw, self.comm.rank, self.comm.size, reshape=False)
        else:
            Gstart = 0
            Gend = self.npw
        
        for a, id in enumerate(setups.id_a):
            phi_aGp[a] = two_phi_planewave_integrals(kk_Gv, setups[a], Gstart, Gend)
            for iG in range(Gstart, Gend):
                phi_aGp[a][iG] *= np.exp(-1j * 2. * pi *
                                         np.dot(q_c + self.Gvec_Gc[iG], spos_ac[a]) )
            if parallel:
                self.comm.sum(phi_aGp[a])
        # For optical limit, G == 0 part should change
        if optical_limit:
            for a, id in enumerate(setups.id_a):
                nabla_iiv = setups[a].nabla_iiv
                phi_aGp[a][0] = -1j * (np.dot(nabla_iiv, qq_v)).ravel()

                phiG0_avp[a] = np.zeros((3, len(phi_aGp[a][0])), complex)
                for dir in range(3): # 3 dimension
                    q2_c = np.diag((1,1,1))[dir] * self.qopt
                    qq2_v = np.dot(q2_c, self.bcell_cv) # summation over c
                    phiG0_avp[a][dir] = -1j * (np.dot(nabla_iiv, qq2_v)).ravel()

        if alldir:
            return phi_aGp, phiG0_avp
        else:
            return phi_aGp

Пример #6

Показать файл

Файл: math_func.py Проект: qsnake/gpaw

def full_hilbert_transform(specfunc_wGG, Nw, dw, eta):

    NwS = specfunc_wGG.shape[0]
    tmp_ww = np.zeros((Nw, NwS), dtype=complex)

    for iw in range(Nw):
        w = iw * dw
        for jw in range(NwS):
            ww = jw * dw
            tmp_ww[iw, jw] = 1. / (w - ww - 1j*eta) - 1. / (w + ww + 1j*eta)

    chi0_wGG = gemmdot(tmp_ww, specfunc_wGG, beta = 0.)

    return chi0_wGG * dw

Пример #7

Показать файл

Файл: math_func.py Проект: yihsuanliu/gpaw

def full_hilbert_transform(specfunc_wGG, Nw, dw, eta):

    NwS = specfunc_wGG.shape[0]
    tmp_ww = np.zeros((Nw, NwS), dtype=complex)

    for iw in range(Nw):
        w = iw * dw
        for jw in range(NwS):
            ww = jw * dw
            tmp_ww[iw,
                   jw] = 1. / (w - ww - 1j * eta) - 1. / (w + ww + 1j * eta)

    chi0_wGG = gemmdot(tmp_ww, specfunc_wGG, beta=0.)

    return chi0_wGG * dw

Пример #8

Показать файл

Файл: math_func.py Проект: robwarm/gpaw-symm

def hilbert_transform(specfunc_wGG, w_w, Nw, dw, eta, fullresponse=False):

    NwS = specfunc_wGG.shape[0]
    tmp_ww = np.zeros((Nw, NwS), dtype=complex)
    ww_w = np.linspace(0., (NwS-1)*dw, NwS)

    for iw in range(Nw):
        if fullresponse is False:
            tmp_ww[iw] = 1. / (w_w[iw] - ww_w + 1j*eta) - 1. / (w_w[iw] + ww_w + 1j*eta)
        else:
            tmp_ww[iw] = 1. / (w_w[iw] - ww_w + 1j*eta) - 1. / (w_w[iw] + ww_w - 1j*eta)

    chi0_wGG = gemmdot(tmp_ww, specfunc_wGG, beta = 0.)

    return chi0_wGG * dw

Пример #9

Показать файл

def hilbert_transform(specfunc_wGG, w_w, Nw, dw, eta, fullresponse=False):

    NwS = specfunc_wGG.shape[0]
    tmp_ww = np.zeros((Nw, NwS), dtype=complex)
    ww_w = np.linspace(0., (NwS - 1) * dw, NwS)

    for iw in range(Nw):
        if fullresponse is False:
            tmp_ww[iw] = 1. / (w_w[iw] - ww_w +
                               1j * eta) - 1. / (w_w[iw] + ww_w + 1j * eta)
        else:
            tmp_ww[iw] = 1. / (w_w[iw] - ww_w +
                               1j * eta) - 1. / (w_w[iw] + ww_w - 1j * eta)

    chi0_wGG = gemmdot(tmp_ww, specfunc_wGG, beta=0.)

    return chi0_wGG * dw

Пример #10

Показать файл

Файл: dos.py Проект: thonmaker/gpaw

def get_all_electron_IPR(paw):
    density = paw.density
    wfs = paw.wfs
    n_G = wfs.gd.empty()
    n_g = density.finegd.empty()
    print()
    print('inverse participation function')
    print('-' * 35)
    print('%5s %5s %10s %10s' % ('k', 'band', 'eps', 'ipr'))
    print('-' * 35)
    for k, kpt in enumerate(paw.wfs.kpt_u):
        for n, (eps, psit_G) in enumerate(zip(kpt.eps_n, kpt.psit_nG)):
            n_G[:] = 0.0
            wfs.add_orbital_density(n_G, kpt, n)
            density.interpolator.apply(n_G, n_g)
            norm = density.finegd.integrate(n_g)
            n_g = n_g**2
            ipr = density.finegd.integrate(n_g)
            for a in kpt.P_ani:
                # Get xccorr for atom a
                setup = paw.density.setups[a]
                xccorr = setup.xc_correction

                # Get D_sp for atom a
                D_sp = np.array(wfs.get_orbital_density_matrix(a, kpt, n))

                # density a function of L and partial wave radial pair
                # density coefficient
                D_sLq = gemmdot(D_sp, xccorr.B_Lqp, trans='t')

                # Create pseudo/ae density iterators for integration
                n_iter = xccorr.expand_density(D_sLq, xccorr.n_qg, None)
                nt_iter = xccorr.expand_density(D_sLq, xccorr.nt_qg, None)

                # Take the spherical average of smooth and ae radial
                # xc potentials
                for n_sg, nt_sg, integrator in zip(
                        n_iter, nt_iter, xccorr.get_integrator(None)):
                    ipr += integrator.weight * np.sum(
                        (n_sg[0]**2 - nt_sg[0]**2) * xccorr.rgd.dv_g)
                    norm += integrator.weight * np.sum(
                        (n_sg[0] - nt_sg[0]) * xccorr.rgd.dv_g)

            print('%5i %5i %10.5f %10.5f' % (k, n, eps, ipr / norm**2))
    print('-' * 35)

Пример #11

Показать файл

Файл: dos.py Проект: robwarm/gpaw-symm

def get_all_electron_IPR(paw):
    density = paw.density
    wfs = paw.wfs
    n_G = wfs.gd.empty()
    n_g = density.finegd.empty()
    print
    print "inverse participation function"
    print "-"*35
    print "%5s %5s %10s %10s" % ("k","band","eps","ipr")
    print "-"*35
    for k, kpt in enumerate(paw.wfs.kpt_u):
        for n, (eps, psit_G)  in enumerate(zip(kpt.eps_n, kpt.psit_nG)):
            n_G[:] = 0.0
            wfs.add_orbital_density(n_G, kpt, n)
            density.interpolator.apply(n_G, n_g)
            norm = density.finegd.integrate(n_g)
            n_g = n_g ** 2
            ipr = density.finegd.integrate(n_g)
            for a in kpt.P_ani:
                # Get xccorr for atom a
                setup = paw.density.setups[a]
                xccorr = setup.xc_correction

                # Get D_sp for atom a
                D_sp = np.array(wfs.get_orbital_density_matrix(a, kpt, n))

                # density a function of L and partial wave radial pair density coefficient
                D_sLq = gemmdot(D_sp, xccorr.B_Lqp, trans='t')
                
                # Create pseudo/ae density iterators for integration
                n_iter = xccorr.expand_density(D_sLq, xccorr.n_qg, None)
                nt_iter = xccorr.expand_density(D_sLq, xccorr.nt_qg, None)

                # Take the spherical average of smooth and ae radial xc potentials
                for n_sg, nt_sg, integrator in izip(n_iter,
                                                    nt_iter,
                                                    xccorr.get_integrator(None)):
                    ipr += integrator.weight * np.sum((n_sg[0]**2-nt_sg[0]**2) * xccorr.rgd.dv_g)
                    norm += integrator.weight * np.sum((n_sg[0]-nt_sg[0]) * xccorr.rgd.dv_g)

            print "%5i %5i %10.5f %10.5f" % (k, n, eps, ipr/norm**2)
    print "-"*35

Пример #12

Показать файл

Файл: base.py Проект: qsnake/gpaw

    def get_phi_aGp(self):
        setups = self.calc.wfs.setups
        spos_ac = self.calc.atoms.get_scaled_positions()
        
        kk_Gv = gemmdot(self.q_c + self.Gvec_Gc, self.bcell_cv.copy(), beta=0.0)
        phi_aGp = {}
        for a, id in enumerate(setups.id_a):
            phi_aGp[a] = two_phi_planewave_integrals(kk_Gv, setups[a])
            for iG in range(self.npw):
                phi_aGp[a][iG] *= np.exp(-1j * 2. * pi *
                                         np.dot(self.q_c + self.Gvec_Gc[iG], spos_ac[a]) )

        # For optical limit, G == 0 part should change
        if self.optical_limit:
            for a, id in enumerate(setups.id_a):
                nabla_iiv = setups[a].nabla_iiv
                phi_aGp[a][0] = -1j * (np.dot(nabla_iiv, self.qq_v)).ravel()

        self.phi_aGp = phi_aGp
        self.printtxt('')
        self.printtxt('Finished phi_Gp !')

        return

Пример #13

Показать файл

Файл: kernel.py Проект: yihsuanliu/gpaw

def calculate_Kxc(gd, nt_sG, npw, Gvec_Gc, nG, vol, bcell_cv, R_av, setups,
                  D_asp):
    """LDA kernel"""

    # The soft part
    assert np.abs(nt_sG[0].shape - nG).sum() == 0

    xc = XC('LDA')

    fxc_sg = np.zeros_like(nt_sG)
    xc.calculate_fxc(gd, nt_sG, fxc_sg)
    fxc_g = fxc_sg[0]

    # FFT fxc(r)
    nG0 = nG[0] * nG[1] * nG[2]
    tmp_g = np.fft.fftn(fxc_g) * vol / nG0

    r_vg = gd.get_grid_point_coordinates()

    Kxc_GG = np.zeros((npw, npw), dtype=complex)
    for iG in range(npw):
        for jG in range(npw):
            dG_c = Gvec_Gc[iG] - Gvec_Gc[jG]
            if (nG / 2 - np.abs(dG_c) > 0).all():
                index = (dG_c + nG) % nG
                Kxc_GG[iG, jG] = tmp_g[index[0], index[1], index[2]]
            else:  # not in the fft index
                dG_v = np.dot(dG_c, bcell_cv)
                dGr_g = gemmdot(dG_v, r_vg, beta=0.0)
                Kxc_GG[iG, jG] = gd.integrate(np.exp(-1j * dGr_g) * fxc_g)

    KxcPAW_GG = np.zeros_like(Kxc_GG)
    # The PAW part
    dG_GGv = np.zeros((npw, npw, 3))
    for iG in range(npw):
        for jG in range(npw):
            dG_c = Gvec_Gc[iG] - Gvec_Gc[jG]
            dG_GGv[iG, jG] = np.dot(dG_c, bcell_cv)

    for a, setup in enumerate(setups):
        if rank == a % size:
            rgd = setup.xc_correction.rgd
            n_qg = setup.xc_correction.n_qg
            nt_qg = setup.xc_correction.nt_qg
            nc_g = setup.xc_correction.nc_g
            nct_g = setup.xc_correction.nct_g
            Y_nL = setup.xc_correction.Y_nL
            dv_g = rgd.dv_g

            D_sp = D_asp[a]
            B_pqL = setup.xc_correction.B_pqL
            D_sLq = np.inner(D_sp, B_pqL.T)
            nspins = len(D_sp)
            assert nspins == 1

            f_sg = rgd.empty(nspins)
            ft_sg = rgd.empty(nspins)

            n_sLg = np.dot(D_sLq, n_qg)
            nt_sLg = np.dot(D_sLq, nt_qg)
            # Add core density
            n_sLg[:, 0] += sqrt(4 * pi) / nspins * nc_g
            nt_sLg[:, 0] += sqrt(4 * pi) / nspins * nct_g

            coefatoms_GG = np.exp(-1j * np.inner(dG_GGv, R_av[a]))

            for n, Y_L in enumerate(Y_nL):
                w = weight_n[n]
                f_sg[:] = 0.0
                n_sg = np.dot(Y_L, n_sLg)
                xc.calculate_fxc(rgd, n_sg, f_sg)

                ft_sg[:] = 0.0
                nt_sg = np.dot(Y_L, nt_sLg)
                xc.calculate_fxc(rgd, nt_sg, ft_sg)

                coef_GGg = np.exp(
                    -1j *
                    np.outer(np.inner(dG_GGv, R_nv[n]), rgd.r_g)).reshape(
                        npw, npw, rgd.ng)
                KxcPAW_GG += w * np.dot(
                    coef_GGg, (f_sg[0] - ft_sg[0]) * dv_g) * coefatoms_GG
    world.sum(KxcPAW_GG)
    Kxc_GG += KxcPAW_GG

    return Kxc_GG / vol

Пример #14

Показать файл

Файл: berryphase.py Проект: thonmaker/gpaw

def parallel_transport(calc,
                       direction=0,
                       spinors=True,
                       name=None,
                       scale=1.0,
                       bands=None,
                       theta=0.0,
                       phi=0.0):

    if isinstance(calc, str):
        calc = GPAW(calc, txt=None, communicator=serial_comm)

    if bands is None:
        nv = int(calc.get_number_of_electrons())
        bands = range(nv)

    cell_cv = calc.wfs.gd.cell_cv
    icell_cv = (2 * np.pi) * np.linalg.inv(cell_cv).T
    r_g = calc.wfs.gd.get_grid_point_coordinates()
    Ng = np.prod(np.shape(r_g)[1:]) * (spinors + 1)

    dO_aii = []
    for ia in calc.wfs.kpt_u[0].P_ani.keys():
        dO_ii = calc.wfs.setups[ia].dO_ii
        if spinors:
            # Spinor projections require doubling of the (identical) orbitals
            dO_jj = np.zeros((2 * len(dO_ii), 2 * len(dO_ii)), complex)
            dO_jj[::2, ::2] = dO_ii
            dO_jj[1::2, 1::2] = dO_ii
            dO_aii.append(dO_jj)
        else:
            dO_aii.append(dO_ii)

    N_c = calc.wfs.kd.N_c
    assert 1 in np.delete(N_c, direction)
    Nkx = N_c[0]
    Nky = N_c[1]
    Nkz = N_c[2]

    Nk = Nkx * Nky * Nkz
    Nloc = N_c[direction]
    Npar = Nk // Nloc

    # Parallelization stuff
    myKsize = -(-Npar // (world.size))
    myKrange = range(rank * myKsize, min((rank + 1) * myKsize, Npar))
    myKsize = len(myKrange)

    # Get array of k-point indices of the path. q index is loc direction
    kpts_kq = []
    for k in range(Npar):
        if direction == 0:
            kpts_kq.append(list(range(k, Nkx * Nky, Nky)))
        if direction == 1:
            if Nkz == 1:
                kpts_kq.append(list(range(k * Nky, (k + 1) * Nky)))
            else:
                kpts_kq.append(list(range(k, Nkz * Nky, Nkz)))
        if direction == 2:
            kpts_kq.append(list(range(k * Nloc, (k + 1) * Nloc)))

    G_c = np.array([0, 0, 0])
    G_c[direction] = 1
    G_v = np.dot(G_c, icell_cv)

    kpts_kc = calc.get_bz_k_points()
    kpts_kv = np.dot(kpts_kc, icell_cv)
    if Nloc > 1:
        b_c = kpts_kc[kpts_kq[0][1]] - kpts_kc[kpts_kq[0][0]]
        b_v = np.dot(b_c, icell_cv)
    else:
        b_v = G_v

    e_mk, v_knm = get_spinorbit_eigenvalues(calc,
                                            return_wfs=True,
                                            scale=scale,
                                            theta=theta,
                                            phi=phi)

    phi_km = np.zeros((Npar, len(bands)), float)
    S_km = np.zeros((Npar, len(bands)), float)
    # Loop over the direction parallel components
    for k in myKrange:
        U_qmm = [np.eye(len(bands))]
        print(k)
        qpts_q = kpts_kq[k]
        # Loop over kpoints in the phase direction
        for q in range(Nloc - 1):
            iq1 = qpts_q[q]
            iq2 = qpts_q[q + 1]
            # print(kpts_kc[iq1], kpts_kc[iq2])
            if q == 0:
                u1_nsG = get_spinorbit_wavefunctions(calc, iq1,
                                                     v_knm[iq1])[bands]
                # Transform from psi-like to u-like
                u1_nsG[:] *= np.exp(-1.0j *
                                    gemmdot(kpts_kv[iq1], r_g, beta=0.0))
                P1_ani = get_spinorbit_projections(calc, iq1, v_knm[iq1])

            u2_nsG = get_spinorbit_wavefunctions(calc, iq2, v_knm[iq2])[bands]
            u2_nsG[:] *= np.exp(-1.0j * gemmdot(kpts_kv[iq2], r_g, beta=0.0))
            P2_ani = get_spinorbit_projections(calc, iq2, v_knm[iq2])

            M_mm = get_overlap(calc, bands,
                               np.reshape(u1_nsG, (len(u1_nsG), Ng)),
                               np.reshape(u2_nsG, (len(u2_nsG), Ng)), P1_ani,
                               P2_ani, dO_aii, b_v)
            V_mm, sing_m, W_mm = np.linalg.svd(M_mm)
            U_mm = np.dot(V_mm, W_mm).conj()
            u_nysxz = np.dot(U_mm, np.swapaxes(u2_nsG, 0, 3))
            u_nxsyz = np.swapaxes(u_nysxz, 1, 3)
            u_nsxyz = np.swapaxes(u_nxsyz, 1, 2)
            u2_nsG = u_nsxyz
            for a in range(len(calc.atoms)):
                P2_ni = P2_ani[a][bands]
                P2_ni = np.dot(U_mm, P2_ni)
                P2_ani[a][bands] = P2_ni
            U_qmm.append(U_mm)
            u1_nsG = u2_nsG
            P1_ani = P2_ani
        U_qmm = np.array(U_qmm)

        # Fix phases for last point
        iq0 = qpts_q[0]
        if Nloc == 1:
            u1_nsG = get_spinorbit_wavefunctions(calc, iq0, v_knm[iq0])[bands]
            u1_nsG[:] *= np.exp(-1.0j * gemmdot(kpts_kv[iq0], r_g, beta=0.0))
            P1_ani = get_spinorbit_projections(calc, iq0, v_knm[iq0])
        u2_nsG = get_spinorbit_wavefunctions(calc, iq0, v_knm[iq0])[bands]
        u2_nsG[:] *= np.exp(-1.0j * gemmdot(kpts_kv[iq0], r_g, beta=0.0))
        u2_nsG[:] *= np.exp(-1.0j * gemmdot(G_v, r_g, beta=0.0))
        P2_ani = get_spinorbit_projections(calc, iq0, v_knm[iq0])
        for a in range(len(calc.atoms)):
            P2_ni = P2_ani[a][bands]
            # P2_ni *= np.exp(-1.0j * np.dot(G_v, r_av[a]))
            P2_ani[a][bands] = P2_ni
        M_mm = get_overlap(calc, bands, np.reshape(u1_nsG, (len(u1_nsG), Ng)),
                           np.reshape(u2_nsG, (len(u2_nsG), Ng)), P1_ani,
                           P2_ani, dO_aii, b_v)
        V_mm, sing_m, W_mm = np.linalg.svd(M_mm)
        U_mm = np.dot(V_mm, W_mm).conj()
        u_nysxz = np.dot(U_mm, np.swapaxes(u2_nsG, 0, 3))
        u_nxsyz = np.swapaxes(u_nysxz, 1, 3)
        u_nsxyz = np.swapaxes(u_nxsyz, 1, 2)
        u2_nsG = u_nsxyz
        for a in range(len(calc.atoms)):
            P2_ni = P2_ani[a][bands]
            P2_ni = np.dot(U_mm, P2_ni)
            P2_ani[a][bands] = P2_ni

        # Get overlap between first kpts and its smoothly translated image
        u2_nsG[:] *= np.exp(1.0j * gemmdot(G_v, r_g, beta=0.0))
        for a in range(len(calc.atoms)):
            P2_ni = P2_ani[a][bands]
            # P2_ni *= np.exp(1.0j * np.dot(G_v, r_av[a]))
            P2_ani[a][bands] = P2_ni
        u1_nsG = get_spinorbit_wavefunctions(calc, iq0, v_knm[iq0])[bands]
        u1_nsG[:] *= np.exp(-1.0j * gemmdot(kpts_kv[iq0], r_g, beta=0.0))
        P1_ani = get_spinorbit_projections(calc, iq0, v_knm[iq0])
        M_mm = get_overlap(calc, bands, np.reshape(u1_nsG, (len(u1_nsG), Ng)),
                           np.reshape(u2_nsG, (len(u2_nsG), Ng)), P1_ani,
                           P2_ani, dO_aii, np.array([0.0, 0.0, 0.0]))
        l_m, l_mm = np.linalg.eig(M_mm)
        phi_km[k] = np.angle(l_m)
        print(phi_km[k] / 2 / np.pi)

        A_mm = np.zeros_like(l_mm, complex)
        for q in range(Nloc):
            iq = qpts_q[q]
            U_mm = U_qmm[q]
            v_nm = U_mm.dot(v_knm[iq][:, bands].T).T
            A_mm += np.dot(v_nm[::2].T.conj(), v_nm[::2])
            A_mm -= np.dot(v_nm[1::2].T.conj(), v_nm[1::2])
        A_mm /= Nloc
        S_km[k] = np.diag(l_mm.T.conj().dot(A_mm).dot(l_mm)).real

    world.sum(phi_km)
    world.sum(S_km)

    np.savez('phases_%s.npz' % name, phi_km=phi_km, S_km=S_km)

Пример #15

Показать файл

Файл: base.py Проект: ryancoleman/lotsofcoresbook2code

    def initialize(self):
                        
        self.eta /= Hartree
        self.ecut /= Hartree

        calc = self.calc
        self.nspins = self.calc.wfs.nspins

        # kpoint init
        self.kd = kd = calc.wfs.kd
        self.nikpt = kd.nibzkpts
        self.ftol /= kd.nbzkpts

        # cell init
        self.acell_cv = calc.wfs.gd.cell_cv
        self.acell_cv, self.bcell_cv, self.vol, self.BZvol = \
                       get_primitive_cell(self.acell_cv,rpad=self.rpad)

        # grid init
        gd = calc.wfs.gd.new_descriptor(comm=serial_comm)
        self.pbc = gd.pbc_c
        self.gd = gd
        self.nG0 = np.prod(gd.N_c)
        # Number of grid points and volume including zero padding
        self.nGrpad = gd.N_c * self.rpad
        self.nG0rpad = np.prod(self.nGrpad)
        self.d_c = [Gradient(gd, i, n=4, dtype=complex).apply for i in range(3)]

        # obtain eigenvalues, occupations
        nibzkpt = kd.nibzkpts
        kweight_k = kd.weight_k

        self.eFermi = self.calc.occupations.get_fermi_level()

        try:
            self.e_skn
            self.printtxt('Use eigenvalues from user.')
        except:
            self.printtxt('Use eigenvalues from the calculator.')
            self.e_skn = {}
            self.f_skn = {}
            for ispin in range(self.nspins):
                self.e_skn[ispin] = np.array([calc.get_eigenvalues(kpt=k, spin=ispin)
                                              for k in range(nibzkpt)]) / Hartree
                self.f_skn[ispin] = np.array([calc.get_occupation_numbers(kpt=k, spin=ispin)
                                              / kweight_k[k]
                                              for k in range(nibzkpt)]) / kd.nbzkpts
            #self.printtxt('Eigenvalues(k=0) are:')
            #print  >> self.txt, self.e_skn[0][0] * Hartree

        self.enoshift_skn = {}
        for ispin in range(self.nspins):
            self.enoshift_skn[ispin] = self.e_skn[ispin].copy()
        if self.eshift is not None:
            self.add_discontinuity(self.eshift)
            self.printtxt('Shift unoccupied bands by %f eV' % (self.eshift))
        # k + q init
        if self.q_c is not None:
            self.qq_v = np.dot(self.q_c, self.bcell_cv) # summation over c
    
            if self.optical_limit:
                kq_k = np.arange(kd.nbzkpts)
                self.expqr_g = 1.
            else:
                r_vg = gd.get_grid_point_coordinates() # (3, nG)
                qr_g = gemmdot(self.qq_v, r_vg, beta=0.0)
                self.expqr_g = np.exp(-1j * qr_g)
                del r_vg, qr_g
                kq_k = kd.find_k_plus_q(self.q_c)
            self.kq_k = kq_k

        # Plane wave init
        if self.G_plus_q:
            self.npw, self.Gvec_Gc, self.Gindex_G = set_Gvectors(self.acell_cv,
                                                                 self.bcell_cv,
                                                                 self.gd.N_c,
                                                                 self.ecut,
                                                                 q=self.q_c)
        else:
            self.npw, self.Gvec_Gc, self.Gindex_G = set_Gvectors(self.acell_cv,
                                                                 self.bcell_cv,
                                                                 self.gd.N_c,
                                                                 self.ecut)

        # band init
        if self.nbands is None:
            self.nbands = calc.wfs.bd.nbands
        self.nvalence = calc.wfs.nvalence

        # Projectors init
        setups = calc.wfs.setups
        self.spos_ac = calc.atoms.get_scaled_positions()

        if self.pwmode:
            self.pt = PWLFC([setup.pt_j for setup in setups], self.calc.wfs.pd)
            self.pt.set_positions(self.spos_ac)
        else:
            self.pt = LFC(gd, [setup.pt_j for setup in setups],
                          KPointDescriptor(self.kd.bzk_kc),
                          dtype=complex, forces=True)

            self.pt.set_positions(self.spos_ac)

        # Printing calculation information
        self.print_stuff()

        return

Пример #16

Показать файл

Файл: gemv.py Проект: yihsuanliu/gpaw

t = time.time()
for n in range(numreps):
    BY1_pq = np.dot(B_pqL, Y_L)
t = time.time() - t
performance = numflop * numreps / t
print 'dot    : %8.5f s, %8.5f Mflops' % (t, performance / 1024**2.)
assert np.abs(BY0_pq - BY1_pq).max() < 5e-12
del BY1_pq

if test_gemmdot:
    BY2_pq = np.empty((P, Q), dtype)
    t = time.time()
    for n in range(numreps):
        BY2_pq.fill(0.0)
        gemmdot(B_pqL, Y_L, 1.0, beta, BY2_pq)
    t = time.time() - t
    performance = numflop * numreps / t
    print 'gemmdot: %8.5f s, %8.5f Mflops' % (t, performance / 1024**2.)
    assert np.abs(BY0_pq - BY2_pq).max() < 5e-12
    del BY2_pq

BY3_pq = np.empty((P, Q), dtype)
t = time.time()
for n in range(numreps):
    BY3_pq.fill(0.0)
    gemv(1.0, B_pqL, Y_L, beta, BY3_pq, 't')
t = time.time() - t
performance = numflop * numreps / t
print 'gemvT  : %8.5f s, %8.5f Mflops' % (t, performance / 1024**2.)
assert np.abs(BY0_pq - BY3_pq).max() < 5e-12

Пример #17

Показать файл

    def get_self_energy(self, df, W_wGG):

        Sigma_skn = np.zeros((self.nspins, self.gwnkpt, self.gwnband),
                             dtype=float)
        dSigma_skn = np.zeros((self.nspins, self.gwnkpt, self.gwnband),
                              dtype=float)

        wcomm = df.wcomm

        if self.static:
            W_wGG = np.array([W_wGG])

        if not self.hilbert_trans:  #method 1
            Wbackup_wG0 = W_wGG[:, :, 0].copy()
            Wbackup_w0G = W_wGG[:, 0, :].copy()

        else:  #method 2, perform Hilbert transform
            nG = np.shape(W_wGG)[1]
            coords = np.zeros(wcomm.size, dtype=int)
            nG_local = nG**2 // wcomm.size
            if wcomm.rank == wcomm.size - 1:
                nG_local = nG**2 - (wcomm.size - 1) * nG_local
            wcomm.all_gather(np.array([nG_local]), coords)
            W_Wg = SliceAlongFrequency(W_wGG, coords, wcomm)

            ng = np.shape(W_Wg)[1]
            Nw = int(self.w_w[-1] / self.dw)

            w1_ww = np.zeros((Nw, df.Nw), dtype=complex)
            for iw in range(Nw):
                w1 = iw * self.dw
                w1_ww[iw] = 1. / (w1 + self.w_w + 1j * self.eta_w) + 1. / (
                    w1 - self.w_w + 1j * self.eta_w)
                w1_ww[iw, 0] -= 1. / (w1 + 1j * self.eta_w[0])  # correct w'=0
                w1_ww[iw] *= self.dw_w

            Cplus_Wg = np.zeros((Nw, ng), dtype=complex)
            Cminus_Wg = np.zeros((Nw, ng), dtype=complex)
            Cplus_Wg = gemmdot(w1_ww, W_Wg, beta=0.0)
            Cminus_Wg = gemmdot(w1_ww.conj(), W_Wg, beta=0.0)

        for s in range(self.nspins):
            for i, k in enumerate(self.gwkpt_k):  # k is bzk index
                if df.optical_limit:
                    kq_c = df.kd.bzk_kc[k]
                else:
                    kq_c = df.kd.bzk_kc[k] - df.q_c  # k - q

                kq = df.kd.where_is_q(kq_c, df.kd.bzk_kc)
                assert df.kq_k[kq] == k
                ibzkpt1 = df.kd.bz2ibz_k[k]
                ibzkpt2 = df.kd.bz2ibz_k[kq]

                for j, n in enumerate(self.bands):
                    for m in range(self.m_start, self.m_end):

                        if self.e_skn[s][ibzkpt2, m] > self.eFermi:
                            sign = 1.
                        else:
                            sign = -1.

                        rho_G = df.density_matrix(m, n, kq, spin1=s, spin2=s)

                        if not self.hilbert_trans:  #method 1
                            W_wGG[:, :, 0] = Wbackup_wG0
                            W_wGG[:, 0, :] = Wbackup_w0G

                            # w1 = w - epsilon_m,k-q
                            w1 = self.e_skn[s][ibzkpt1,
                                               n] - self.e_skn[s][ibzkpt2, m]

                            if self.ppa:
                                # analytical expression for Plasmon Pole Approximation
                                W_GG = sign * W_wGG[0] * (
                                    1. /
                                    (w1 + self.wt_GG - 1j * self.eta) - 1. /
                                    (w1 - self.wt_GG + 1j * self.eta))
                                W_GG -= W_wGG[0] * (
                                    1. /
                                    (w1 + self.wt_GG + 1j * self.eta * sign) +
                                    1. /
                                    (w1 - self.wt_GG + 1j * self.eta * sign))
                                W_G = gemmdot(W_GG, rho_G, beta=0.0)
                                Sigma_skn[s, i, j] += np.real(
                                    gemmdot(W_G,
                                            rho_G,
                                            alpha=self.alpha,
                                            beta=0.0,
                                            trans='c'))

                                W_GG = sign * W_wGG[0] * (
                                    1. /
                                    (w1 - self.wt_GG + 1j * self.eta)**2 - 1. /
                                    (w1 + self.wt_GG - 1j * self.eta)**2)
                                W_GG += W_wGG[0] * (
                                    1. /
                                    (w1 - self.wt_GG + 1j * self.eta * sign)**2
                                    + 1. /
                                    (w1 + self.wt_GG + 1j * self.eta * sign)**2
                                )
                                W_G = gemmdot(W_GG, rho_G, beta=0.0)
                                dSigma_skn[s, i, j] += np.real(
                                    gemmdot(W_G,
                                            rho_G,
                                            alpha=self.alpha,
                                            beta=0.0,
                                            trans='c'))

                            elif self.static:
                                W1_GG = W_wGG[0] - np.eye(df.npw) * self.Kc_GG
                                W2_GG = W_wGG[0]

                                # perform W_GG * np.outer(rho_G.conj(), rho_G).sum(GG)
                                W_G = gemmdot(W1_GG, rho_G,
                                              beta=0.0)  # Coulomb Hole
                                Sigma_skn[s, i, j] += np.real(
                                    gemmdot(W_G,
                                            rho_G,
                                            alpha=self.alpha * pi / 1j,
                                            beta=0.0,
                                            trans='c'))
                                if sign == -1:
                                    W_G = gemmdot(
                                        W2_GG, rho_G,
                                        beta=0.0)  # Screened Exchange
                                    Sigma_skn[s, i, j] -= np.real(
                                        gemmdot(W_G,
                                                rho_G,
                                                alpha=2 * self.alpha * pi / 1j,
                                                beta=0.0,
                                                trans='c'))
                                del W1_GG, W2_GG, W_G, rho_G

                            else:
                                # perform W_wGG * np.outer(rho_G.conj(), rho_G).sum(GG)
                                W_wG = gemmdot(W_wGG, rho_G, beta=0.0)
                                C_wlocal = gemmdot(W_wG,
                                                   rho_G,
                                                   alpha=self.alpha,
                                                   beta=0.0,
                                                   trans='c')
                                del W_wG, rho_G

                                C_w = np.zeros(df.Nw, dtype=complex)
                                wcomm.all_gather(C_wlocal, C_w)
                                del C_wlocal

                                # calculate self energy
                                w1_w = 1. / (w1 - self.w_w + 1j * self.eta_w *
                                             sign) + 1. / (w1 + self.w_w + 1j *
                                                           self.eta_w * sign)
                                w1_w[0] -= 1. / (w1 + 1j * self.eta_w[0] * sign
                                                 )  # correct w'=0
                                w1_w *= self.dw_w
                                Sigma_skn[s, i, j] += np.real(
                                    gemmdot(C_w, w1_w, beta=0.0))

                                # calculate derivate of self energy with respect to w
                                w1_w = 1. / (w1 - self.w_w + 1j * self.eta_w *
                                             sign)**2 + 1. / (
                                                 w1 + self.w_w +
                                                 1j * self.eta_w * sign)**2
                                w1_w[0] -= 1. / (w1 + 1j * self.eta_w[0] *
                                                 sign)**2  # correct w'=0
                                w1_w *= self.dw_w
                                dSigma_skn[s, i, j] -= np.real(
                                    gemmdot(C_w, w1_w, beta=0.0))

                        else:  #method 2
                            if not np.abs(self.e_skn[s][ibzkpt2, m] -
                                          self.e_skn[s][ibzkpt1, n]) < 1e-10:
                                sign *= np.sign(self.e_skn[s][ibzkpt1, n] -
                                                self.e_skn[s][ibzkpt2, m])

                            # find points on frequency grid
                            w0 = self.e_skn[s][ibzkpt1,
                                               n] - self.e_skn[s][ibzkpt2, m]
                            w0_id = np.abs(int(w0 / self.dw))
                            w1 = w0_id * self.dw
                            w2 = (w0_id + 1) * self.dw

                            # choose plus or minus, treat optical limit:
                            if sign == 1:
                                C_Wg = Cplus_Wg[
                                    w0_id:w0_id +
                                    2]  # only two grid points needed for each w0
                            if sign == -1:
                                C_Wg = Cminus_Wg[
                                    w0_id:w0_id +
                                    2]  # only two grid points needed for each w0

                            C_wGG = GatherOrbitals(C_Wg, coords, wcomm).copy()
                            del C_Wg

                            # special treat of w0 = 0 (degenerate states):
                            if w0_id == 0:
                                Cplustmp_GG = GatherOrbitals(
                                    Cplus_Wg[1], coords, wcomm).copy()
                                Cminustmp_GG = GatherOrbitals(
                                    Cminus_Wg[1], coords, wcomm).copy()

                            # perform C_wGG * np.outer(rho_G.conj(), rho_G).sum(GG)

                            if w0_id == 0:
                                Sw0_G = gemmdot(C_wGG[0], rho_G, beta=0.0)
                                Sw0 = np.real(
                                    gemmdot(Sw0_G,
                                            rho_G,
                                            alpha=self.alpha,
                                            beta=0.0,
                                            trans='c'))
                                Sw1_G = gemmdot(Cplustmp_GG, rho_G, beta=0.0)
                                Sw1 = np.real(
                                    gemmdot(Sw1_G,
                                            rho_G,
                                            alpha=self.alpha,
                                            beta=0.0,
                                            trans='c'))
                                Sw2_G = gemmdot(Cminustmp_GG, rho_G, beta=0.0)
                                Sw2 = np.real(
                                    gemmdot(Sw2_G,
                                            rho_G,
                                            alpha=self.alpha,
                                            beta=0.0,
                                            trans='c'))

                                Sigma_skn[s, i, j] += Sw0
                                dSigma_skn[s, i,
                                           j] += (Sw1 + Sw2) / (2 * self.dw)

                            else:
                                Sw1_G = gemmdot(C_wGG[0], rho_G, beta=0.0)
                                Sw1 = np.real(
                                    gemmdot(Sw1_G,
                                            rho_G,
                                            alpha=self.alpha,
                                            beta=0.0,
                                            trans='c'))
                                Sw2_G = gemmdot(C_wGG[1], rho_G, beta=0.0)
                                Sw2 = np.real(
                                    gemmdot(Sw2_G,
                                            rho_G,
                                            alpha=self.alpha,
                                            beta=0.0,
                                            trans='c'))

                                Sw0 = (w2 - np.abs(w0)) / self.dw * Sw1 + (
                                    np.abs(w0) - w1) / self.dw * Sw2
                                Sigma_skn[s, i, j] += np.sign(
                                    self.e_skn[s][ibzkpt1, n] -
                                    self.e_skn[s][ibzkpt2, m]) * Sw0
                                dSigma_skn[s, i, j] += (Sw2 - Sw1) / self.dw

        self.ncomm.barrier()
        self.ncomm.sum(Sigma_skn)
        self.ncomm.sum(dSigma_skn)

        return Sigma_skn, dSigma_skn

Пример #18

Показать файл

Файл: kernel.py Проект: robwarm/gpaw-symm

def calculate_Kxc(gd,
                  nt_sG,
                  npw,
                  Gvec_Gc,
                  nG,
                  vol,
                  bcell_cv,
                  R_av,
                  setups,
                  D_asp,
                  functional='ALDA',
                  density_cut=None):
    """ALDA kernel"""

    # The soft part
    #assert np.abs(nt_sG[0].shape - nG).sum() == 0
    if functional == 'ALDA_X':
        x_only = True
        A_x = -3. / 4. * (3. / np.pi)**(1. / 3.)
        nspins = len(nt_sG)
        assert nspins in [1, 2]
        fxc_sg = nspins**(1. / 3.) * 4. / 9. * A_x * nt_sG**(-2. / 3.)
    else:
        assert len(nt_sG) == 1
        x_only = False
        fxc_sg = np.zeros_like(nt_sG)
        xc = XC(functional[1:])
        xc.calculate_fxc(gd, nt_sG, fxc_sg)

    if density_cut is not None:
        fxc_sg[np.where(nt_sG * len(nt_sG) < density_cut)] = 0.0

    # FFT fxc(r)
    nG0 = nG[0] * nG[1] * nG[2]
    tmp_sg = [np.fft.fftn(fxc_sg[s]) * vol / nG0 for s in range(len(nt_sG))]

    r_vg = gd.get_grid_point_coordinates()
    Kxc_sGG = np.zeros((len(fxc_sg), npw, npw), dtype=complex)
    for s in range(len(fxc_sg)):
        for iG in range(npw):
            for jG in range(npw):
                dG_c = Gvec_Gc[iG] - Gvec_Gc[jG]
                if (nG / 2 - np.abs(dG_c) > 0).all():
                    index = (dG_c + nG) % nG
                    Kxc_sGG[s, iG, jG] = tmp_sg[s][index[0],
                                                   index[1],
                                                   index[2]]
                else:  # not in the fft index
                    dG_v = np.dot(dG_c, bcell_cv)
                    dGr_g = gemmdot(dG_v, r_vg, beta=0.0)
                    Kxc_sGG[s, iG, jG] = gd.integrate(np.exp(-1j * dGr_g)
                                                      * fxc_sg[s])

    # The PAW part
    KxcPAW_sGG = np.zeros_like(Kxc_sGG)
    dG_GGv = np.zeros((npw, npw, 3))
    for iG in range(npw):
        for jG in range(npw):
            dG_c = Gvec_Gc[iG] - Gvec_Gc[jG]
            dG_GGv[iG, jG] = np.dot(dG_c, bcell_cv)

    for a, setup in enumerate(setups):
        if rank == a % size:
            rgd = setup.xc_correction.rgd
            n_qg = setup.xc_correction.n_qg
            nt_qg = setup.xc_correction.nt_qg
            nc_g = setup.xc_correction.nc_g
            nct_g = setup.xc_correction.nct_g
            Y_nL = setup.xc_correction.Y_nL
            dv_g = rgd.dv_g

            D_sp = D_asp[a]
            B_pqL = setup.xc_correction.B_pqL
            D_sLq = np.inner(D_sp, B_pqL.T)
            nspins = len(D_sp)

            f_sg = rgd.empty(nspins)
            ft_sg = rgd.empty(nspins)

            n_sLg = np.dot(D_sLq, n_qg)
            nt_sLg = np.dot(D_sLq, nt_qg)

            # Add core density
            n_sLg[:, 0] += np.sqrt(4. * np.pi) / nspins * nc_g
            nt_sLg[:, 0] += np.sqrt(4. * np.pi) / nspins * nct_g

            coefatoms_GG = np.exp(-1j * np.inner(dG_GGv, R_av[a]))
            for n, Y_L in enumerate(Y_nL):
                w = weight_n[n]
                f_sg[:] = 0.0
                n_sg = np.dot(Y_L, n_sLg)
                if x_only:
                    f_sg = nspins * (4 / 9.) * A_x * (nspins * n_sg)**(-2 / 3.)
                else:
                    xc.calculate_fxc(rgd, n_sg, f_sg)

                ft_sg[:] = 0.0
                nt_sg = np.dot(Y_L, nt_sLg)
                if x_only:
                    ft_sg = nspins * (4 / 9.) * (A_x
                                                 * (nspins * nt_sg)**(-2 / 3.))
                else:
                    xc.calculate_fxc(rgd, nt_sg, ft_sg)
                for i in range(len(rgd.r_g)):
                    coef_GG = np.exp(-1j * np.inner(dG_GGv, R_nv[n])
                                     * rgd.r_g[i])
                    for s in range(len(f_sg)):
                        KxcPAW_sGG[s] += w * np.dot(coef_GG,
                                                    (f_sg[s, i] -
                                                     ft_sg[s, i])
                                                    * dv_g[i]) * coefatoms_GG

    world.sum(KxcPAW_sGG)
    Kxc_sGG += KxcPAW_sGG

    return Kxc_sGG / vol

Пример #19

Показать файл

    def initialize(self):

        self.eta /= Hartree
        self.ecut /= Hartree

        calc = self.calc
        self.nspins = self.calc.wfs.nspins

        # kpoint init
        self.kd = kd = calc.wfs.kd
        self.nikpt = kd.nibzkpts
        self.ftol /= kd.nbzkpts

        # cell init
        self.acell_cv = calc.wfs.gd.cell_cv
        self.acell_cv, self.bcell_cv, self.vol, self.BZvol = \
                       get_primitive_cell(self.acell_cv,rpad=self.rpad)

        # grid init
        gd = calc.wfs.gd.new_descriptor(comm=serial_comm)
        self.pbc = gd.pbc_c
        self.gd = gd
        self.nG0 = np.prod(gd.N_c)
        # Number of grid points and volume including zero padding
        self.nGrpad = gd.N_c * self.rpad
        self.nG0rpad = np.prod(self.nGrpad)
        self.d_c = [
            Gradient(gd, i, n=4, dtype=complex).apply for i in range(3)
        ]

        # obtain eigenvalues, occupations
        nibzkpt = kd.nibzkpts
        kweight_k = kd.weight_k

        self.eFermi = self.calc.occupations.get_fermi_level()

        try:
            self.e_skn
            self.printtxt('Use eigenvalues from user.')
        except:
            self.printtxt('Use eigenvalues from the calculator.')
            self.e_skn = {}
            self.f_skn = {}
            for ispin in range(self.nspins):
                self.e_skn[ispin] = np.array([
                    calc.get_eigenvalues(kpt=k, spin=ispin)
                    for k in range(nibzkpt)
                ]) / Hartree
                self.f_skn[ispin] = np.array([
                    calc.get_occupation_numbers(kpt=k, spin=ispin) /
                    kweight_k[k] for k in range(nibzkpt)
                ]) / kd.nbzkpts
            #self.printtxt('Eigenvalues(k=0) are:')
            #print  >> self.txt, self.e_skn[0][0] * Hartree

        self.enoshift_skn = {}
        for ispin in range(self.nspins):
            self.enoshift_skn[ispin] = self.e_skn[ispin].copy()
        if self.eshift is not None:
            self.add_discontinuity(self.eshift)
            self.printtxt('Shift unoccupied bands by %f eV' % (self.eshift))
        # k + q init
        if self.q_c is not None:
            self.qq_v = np.dot(self.q_c, self.bcell_cv)  # summation over c

            if self.optical_limit:
                kq_k = np.arange(kd.nbzkpts)
                self.expqr_g = 1.
            else:
                r_vg = gd.get_grid_point_coordinates()  # (3, nG)
                qr_g = gemmdot(self.qq_v, r_vg, beta=0.0)
                self.expqr_g = np.exp(-1j * qr_g)
                del r_vg, qr_g
                kq_k = kd.find_k_plus_q(self.q_c)
            self.kq_k = kq_k

        # Plane wave init
        if self.G_plus_q:
            self.npw, self.Gvec_Gc, self.Gindex_G = set_Gvectors(self.acell_cv,
                                                                 self.bcell_cv,
                                                                 self.gd.N_c,
                                                                 self.ecut,
                                                                 q=self.q_c)
        else:
            self.npw, self.Gvec_Gc, self.Gindex_G = set_Gvectors(
                self.acell_cv, self.bcell_cv, self.gd.N_c, self.ecut)

        # band init
        if self.nbands is None:
            self.nbands = calc.wfs.bd.nbands
        self.nvalence = calc.wfs.nvalence

        # Projectors init
        setups = calc.wfs.setups
        self.spos_ac = calc.atoms.get_scaled_positions()

        if self.pwmode:
            self.pt = PWLFC([setup.pt_j for setup in setups], self.calc.wfs.pd)
            self.pt.set_positions(self.spos_ac)
        else:
            self.pt = LFC(gd, [setup.pt_j for setup in setups],
                          KPointDescriptor(self.kd.bzk_kc),
                          dtype=complex,
                          forces=True)

            self.pt.set_positions(self.spos_ac)

        # Printing calculation information
        self.print_stuff()

        return

Пример #20

Показать файл

import numpy as np
from gpaw.utilities.blas import \
    gemm, axpy, r2k, rk, gemmdot, dotc
from gpaw.utilities.tools import tri2full

a = np.arange(5 * 7).reshape(5, 7) + 4.
a2 = np.arange(3 * 7).reshape(3, 7) + 3.
b = np.arange(7) - 2.

# Check gemmdot with floats
assert np.all(np.dot(a, b) == gemmdot(a, b))
assert np.all(np.dot(a, a2.T) == gemmdot(a, a2, trans='t'))
assert np.all(np.dot(a, a2.T) == gemmdot(a, a2, trans='c'))
assert np.dot(b, b) == gemmdot(b, b)

# Check gemmdot with complex arrays
a = a * (2 + 1.j)
a2 = a2 * (-1 + 3.j)
b = b * (3 - 2.j)
assert np.all(np.dot(a, b) == gemmdot(a, b))
assert np.all(np.dot(a, a2.T) == gemmdot(a, a2, trans='t'))
assert np.all(np.dot(a, a2.T.conj()) == gemmdot(a, a2, trans='c'))
assert np.dot(b, b) == gemmdot(b, b, trans='n')
assert np.dot(b, b.conj()) == gemmdot(b, b, trans='c')
assert np.vdot(a, 5.j * a) == dotc(a, 5.j * a)

# Check gemm for transa='n'
a2 = np.arange(7 * 5 * 1 * 3).reshape(7, 5, 1, 3) * (-1. + 4.j) + 3.
c = np.tensordot(a, a2, [1, 0])
gemm(1., a2, a, -1., c, 'n')
assert not c.any()

Пример #21

Показать файл

Файл: base.py Проект: qsnake/gpaw

    def initialize(self):
                        
        self.eta /= Hartree
        self.ecut /= Hartree

        calc = self.calc
        
        # kpoint init
        self.kd = kd = calc.wfs.kd
        self.bzk_kc = kd.bzk_kc
        self.ibzk_kc = kd.ibzk_kc
        self.nkpt = kd.nbzkpts
        self.ftol /= self.nkpt

        # band init
        if self.nbands is None:
            self.nbands = calc.wfs.nbands
        self.nvalence = calc.wfs.nvalence

        # cell init
        self.acell_cv = calc.atoms.cell / Bohr
        self.bcell_cv, self.vol, self.BZvol = get_primitive_cell(self.acell_cv)

        # grid init
        self.nG = calc.get_number_of_grid_points()
        self.nG0 = self.nG[0] * self.nG[1] * self.nG[2]
        gd = GridDescriptor(self.nG, calc.wfs.gd.cell_cv, pbc_c=True, comm=serial_comm)
        self.gd = gd        
        self.h_cv = gd.h_cv

        # obtain eigenvalues, occupations
        nibzkpt = kd.nibzkpts
        kweight_k = kd.weight_k

        try:
            self.e_kn
        except:
            self.printtxt('Use eigenvalues from the calculator.')
            self.e_kn = np.array([calc.get_eigenvalues(kpt=k)
                    for k in range(nibzkpt)]) / Hartree
            self.printtxt('Eigenvalues(k=0) are:')
            print  >> self.txt, self.e_kn[0] * Hartree
        self.f_kn = np.array([calc.get_occupation_numbers(kpt=k) / kweight_k[k]
                    for k in range(nibzkpt)]) / self.nkpt


        # k + q init
        assert self.q_c is not None
        self.qq_v = np.dot(self.q_c, self.bcell_cv) # summation over c

        if self.optical_limit:
            kq_k = np.arange(self.nkpt)
            self.expqr_g = 1.
        else:
            r_vg = gd.get_grid_point_coordinates() # (3, nG)
            qr_g = gemmdot(self.qq_v, r_vg, beta=0.0)
            self.expqr_g = np.exp(-1j * qr_g)
            del r_vg, qr_g
            kq_k = kd.find_k_plus_q(self.q_c)
        self.kq_k = kq_k

        # Plane wave init
        self.npw, self.Gvec_Gc, self.Gindex_G = set_Gvectors(self.acell_cv, self.bcell_cv, self.nG, self.ecut)

        # Projectors init
        setups = calc.wfs.setups
        pt = LFC(gd, [setup.pt_j for setup in setups],
                 dtype=calc.wfs.dtype, forces=True)
        spos_ac = calc.atoms.get_scaled_positions()
        pt.set_k_points(self.bzk_kc)
        pt.set_positions(spos_ac)
        self.pt = pt

        # Printing calculation information
        self.print_stuff()

        return

Пример #22

Показать файл

Файл: lcao.py Проект: eojons/gpaw-scme

    def calculate_forces(self, hamiltonian, F_av):
        self.timer.start('LCAO forces')

        spos_ac = self.tci.atoms.get_scaled_positions() % 1.0
        ksl = self.ksl
        nao = ksl.nao
        mynao = ksl.mynao
        nq = len(self.kd.ibzk_qc)
        dtype = self.dtype
        tci = self.tci
        gd = self.gd
        bfs = self.basis_functions
        
        Mstart = ksl.Mstart
        Mstop = ksl.Mstop

        from gpaw.kohnsham_layouts import BlacsOrbitalLayouts
        isblacs = isinstance(ksl, BlacsOrbitalLayouts) # XXX
        
        if not isblacs:
            self.timer.start('TCI derivative')
            dThetadR_qvMM = np.empty((nq, 3, mynao, nao), dtype)
            dTdR_qvMM = np.empty((nq, 3, mynao, nao), dtype)
            dPdR_aqvMi = {}
            for a in self.basis_functions.my_atom_indices:
                ni = self.setups[a].ni
                dPdR_aqvMi[a] = np.empty((nq, 3, nao, ni), dtype)
            tci.calculate_derivative(spos_ac, dThetadR_qvMM, dTdR_qvMM,
                                     dPdR_aqvMi)
            gd.comm.sum(dThetadR_qvMM)
            gd.comm.sum(dTdR_qvMM)
            self.timer.stop('TCI derivative')
        
            my_atom_indices = bfs.my_atom_indices
            atom_indices = bfs.atom_indices

            def _slices(indices):
                for a in indices:
                    M1 = bfs.M_a[a] - Mstart
                    M2 = M1 + self.setups[a].nao
                    if M2 > 0:
                        yield a, max(0, M1), M2

            def slices():
                return _slices(atom_indices)

            def my_slices():
                return _slices(my_atom_indices)
        
        #
        #         -----                    -----
        #          \    -1                  \    *
        # E      =  )  S     H    rho     =  )  c     eps  f  c
        #  mu nu   /    mu x  x z    z nu   /    n mu    n  n  n nu
        #         -----                    -----
        #          x z                       n
        #
        # We use the transpose of that matrix.  The first form is used
        # if rho is given, otherwise the coefficients are used.
        self.timer.start('Initial')


        rhoT_uMM = []
        ET_uMM = []

        if not isblacs:
            if self.kpt_u[0].rho_MM is None:
                self.timer.start('Get density matrix')
                for kpt in self.kpt_u:
                    rhoT_MM = ksl.get_transposed_density_matrix(kpt.f_n,
                                                                kpt.C_nM)
                    rhoT_uMM.append(rhoT_MM)
                    ET_MM = ksl.get_transposed_density_matrix(kpt.f_n
                                                              * kpt.eps_n,
                                                              kpt.C_nM)
                    ET_uMM.append(ET_MM)

                    if hasattr(kpt, 'c_on'):
                        # XXX does this work with BLACS/non-BLACS/etc.?
                        assert self.bd.comm.size == 1
                        d_nn = np.zeros((self.bd.mynbands, self.bd.mynbands), dtype=kpt.C_nM.dtype)
                        for ne, c_n in zip(kpt.ne_o, kpt.c_on):
                                d_nn += ne * np.outer(c_n.conj(), c_n)
                        rhoT_MM += ksl.get_transposed_density_matrix_delta(d_nn, kpt.C_nM)
                        ET_MM += ksl.get_transposed_density_matrix_delta(d_nn * kpt.eps_n, kpt.C_nM)
                self.timer.stop('Get density matrix')
            else:
                rhoT_uMM = []
                ET_uMM = []
                for kpt in self.kpt_u:
                    H_MM = self.eigensolver.calculate_hamiltonian_matrix(hamiltonian, self, kpt)
                    tri2full(H_MM)
                    S_MM = kpt.S_MM.copy()
                    tri2full(S_MM)
                    ET_MM = np.linalg.solve(S_MM, gemmdot(H_MM,
                                                          kpt.rho_MM)).T.copy()
                    del S_MM, H_MM
                    rhoT_MM = kpt.rho_MM.T.copy()
                    rhoT_uMM.append(rhoT_MM)
                    ET_uMM.append(ET_MM)
        self.timer.stop('Initial')

        if isblacs: # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
            from gpaw.blacs import BlacsGrid, Redistributor
            
            def get_density_matrix(f_n, C_nM, redistributor):
                rho1_mm = ksl.calculate_blocked_density_matrix(f_n,
                                                               C_nM).conj()
                rho_mm = redistributor.redistribute(rho1_mm)
                return rho_mm
            
            pcutoff_a = [max([pt.get_cutoff() for pt in setup.pt_j])
                         for setup in self.setups]
            phicutoff_a = [max([phit.get_cutoff() for phit in setup.phit_j])
                           for setup in self.setups]
            
            # XXX should probably use bdsize x gdsize instead
            # That would be consistent with some existing grids
            grid = BlacsGrid(ksl.block_comm, self.gd.comm.size,
                             self.bd.comm.size)
            
            blocksize1 = -(-nao // grid.nprow)
            blocksize2 = -(-nao // grid.npcol)
            # XXX what are rows and columns actually?
            desc = grid.new_descriptor(nao, nao, blocksize1, blocksize2)
            
            rhoT_umm = []
            ET_umm = []
            redistributor = Redistributor(grid.comm, ksl.mmdescriptor, desc)
            Fpot_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                self.timer.start('Get density matrix')
                rhoT_mm = get_density_matrix(kpt.f_n, kpt.C_nM, redistributor)
                rhoT_umm.append(rhoT_mm)
                self.timer.stop('Get density matrix')
                
                self.timer.start('Potential')
                rhoT_mM = ksl.distribute_to_columns(rhoT_mm, desc)
                
                vt_G = hamiltonian.vt_sG[kpt.s]
                Fpot_av += bfs.calculate_force_contribution(vt_G, rhoT_mM,
                                                            kpt.q)
                del rhoT_mM
                self.timer.stop('Potential')
            
            self.timer.start('Get density matrix')
            for kpt in self.kpt_u:
                ET_mm = get_density_matrix(kpt.f_n * kpt.eps_n, kpt.C_nM,
                                           redistributor)
                ET_umm.append(ET_mm)
            self.timer.stop('Get density matrix')
            
            M1start = blocksize1 * grid.myrow
            M2start = blocksize2 * grid.mycol
            
            M1stop = min(M1start + blocksize1, nao)
            M2stop = min(M2start + blocksize2, nao)
            
            m1max = M1stop - M1start
            m2max = M2stop - M2start


        
        if not isblacs:
            # Kinetic energy contribution
            #
            #           ----- d T
            #  a         \       mu nu
            # F += 2 Re   )   -------- rho
            #            /    d R         nu mu
            #           -----    mu nu
            #        mu in a; nu
            #
            Fkin_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                dEdTrhoT_vMM = (dTdR_qvMM[kpt.q]
                                * rhoT_uMM[u][np.newaxis]).real
                for a, M1, M2 in my_slices():
                    Fkin_av[a, :] += 2.0 * dEdTrhoT_vMM[:, M1:M2].sum(-1).sum(-1)
            del dEdTrhoT_vMM


            # Density matrix contribution due to basis overlap
            #
            #            ----- d Theta
            #  a          \           mu nu
            # F  += -2 Re  )   ------------  E
            #             /        d R        nu mu
            #            -----        mu nu
            #         mu in a; nu
            #
            Ftheta_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                dThetadRE_vMM = (dThetadR_qvMM[kpt.q]
                                 * ET_uMM[u][np.newaxis]).real
                for a, M1, M2 in my_slices():
                    Ftheta_av[a, :] += -2.0 * dThetadRE_vMM[:, M1:M2].sum(-1).sum(-1)
            del dThetadRE_vMM

        if isblacs:
            from gpaw.lcao.overlap import TwoCenterIntegralCalculator
            self.timer.start('Prepare TCI loop')
            M_a = bfs.M_a
            
            Fkin2_av = np.zeros_like(F_av)
            Ftheta2_av = np.zeros_like(F_av)

            cell_cv = tci.atoms.cell
            spos_ac = tci.atoms.get_scaled_positions() % 1.0

            overlapcalc = TwoCenterIntegralCalculator(self.kd.ibzk_qc,
                                                      derivative=False)

            def get_phases(offset):
                return overlapcalc.phaseclass(overlapcalc.ibzk_qc, offset)

            # XXX this is not parallel *AT ALL*.
            self.timer.start('Get neighbors')
            nl = tci.atompairs.pairs.neighbors
            r_and_offset_aao = get_r_and_offsets(nl, spos_ac, cell_cv)
            atompairs = r_and_offset_aao.keys()
            atompairs.sort()
            self.timer.stop('Get neighbors')

            T_expansions = tci.T_expansions
            Theta_expansions = tci.Theta_expansions
            P_expansions = tci.P_expansions
            nq = len(self.ibzk_qc)
            
            dH_asp = hamiltonian.dH_asp

            self.timer.start('broadcast dH')
            alldH_asp = {}
            for a in range(len(self.setups)):
                gdrank = bfs.sphere_a[a].rank
                if gdrank == gd.rank:
                    dH_sp = dH_asp[a]
                else:
                    ni = self.setups[a].ni
                    dH_sp = np.empty((self.nspins, ni * (ni + 1) // 2))
                gd.comm.broadcast(dH_sp, gdrank)
                # okay, now everyone gets copies of dH_sp
                alldH_asp[a] = dH_sp
            self.timer.stop('broadcast dH')
            

            # This will get sort of hairy.  We need to account for some
            # three-center overlaps, such as:
            #
            #         a1
            #      Phi   ~a3    a3  ~a3     a2     a2,a1
            #   < ----  |p  > dH   <p   |Phi  > rho
            #      dR
            #
            # To this end we will loop over all pairs of atoms (a1, a3),
            # and then a sub-loop over (a3, a2).
            from gpaw.lcao.overlap import DerivativeAtomicDisplacement
            class Displacement(DerivativeAtomicDisplacement):
                def __init__(self, a1, a2, R_c, offset):
                    phases = overlapcalc.phaseclass(overlapcalc.ibzk_qc,
                                                    offset)
                    DerivativeAtomicDisplacement.__init__(self, None, a1, a2,
                                                          R_c, offset, phases)

            # Cache of Displacement objects with spherical harmonics with
            # evaluated spherical harmonics.
            disp_aao = {}

            def get_displacements(a1, a2, maxdistance):
                # XXX the way maxdistance is handled it can lead to
                # bad caching when different maxdistances are passed
                # to subsequent calls with same pair of atoms
                disp_o = disp_aao.get((a1, a2))
                if disp_o is None:
                    disp_o = []
                    for r, offset in r_and_offset_aao[(a1, a2)]:
                        if np.linalg.norm(r) > maxdistance:
                            continue
                        disp = Displacement(a1, a2, r, offset)
                        disp_o.append(disp)
                    disp_aao[(a1, a2)] = disp_o
                return [disp for disp in disp_o if disp.r < maxdistance]
                
            self.timer.stop('Prepare TCI loop')
            self.timer.start('Not so complicated loop')


            for (a1, a2) in atompairs:
                if a1 >= a2:
                    # Actually this leads to bad load balance.
                    # We should take a1 > a2 or a1 < a2 equally many times.
                    # Maybe decide which of these choices
                    # depending on whether a2 % 1 == 0
                    continue
                
                m1start = M_a[a1] - M1start
                m2start = M_a[a2] - M2start
                if m1start >= blocksize1 or m2start >= blocksize2:
                    continue

                T_expansion = T_expansions.get(a1, a2)
                Theta_expansion = Theta_expansions.get(a1, a2)
                P_expansion = P_expansions.get(a1, a2)
                nm1, nm2 = T_expansion.shape

                m1stop = min(m1start + nm1, m1max)
                m2stop = min(m2start + nm2, m2max)

                if m1stop <= 0 or m2stop <= 0:
                    continue

                m1start = max(m1start, 0)
                m2start = max(m2start, 0)
                J1start = max(0, M1start - M_a[a1])
                J2start = max(0, M2start - M_a[a2])
                M1stop = J1start + m1stop - m1start
                J2stop = J2start + m2stop - m2start

                dTdR_qvmm = T_expansion.zeros((nq, 3), dtype=dtype)
                dThetadR_qvmm = Theta_expansion.zeros((nq, 3), dtype=dtype)

                disp_o = get_displacements(a1, a2,
                                           phicutoff_a[a1] + phicutoff_a[a2])
                for disp in disp_o:
                    disp.evaluate_overlap(T_expansion, dTdR_qvmm)
                    disp.evaluate_overlap(Theta_expansion, dThetadR_qvmm)

                for u, kpt in enumerate(self.kpt_u):
                    rhoT_mm = rhoT_umm[u][m1start:m1stop, m2start:m2stop]
                    ET_mm = ET_umm[u][m1start:m1stop, m2start:m2stop]
                    Fkin_v = 2.0 * (dTdR_qvmm[kpt.q][:, J1start:M1stop,
                                                     J2start:J2stop]
                                    * rhoT_mm[np.newaxis]).real.sum(-1).sum(-1)
                    Ftheta_v = 2.0 * (dThetadR_qvmm[kpt.q][:, J1start:M1stop,
                                                           J2start:J2stop]
                                      * ET_mm[np.newaxis]).real.sum(-1).sum(-1)
                    Fkin2_av[a1] += Fkin_v
                    Fkin2_av[a2] -= Fkin_v
                    Ftheta2_av[a1] -= Ftheta_v
                    Ftheta2_av[a2] += Ftheta_v

            Fkin_av = Fkin2_av
            Ftheta_av = Ftheta2_av
            self.timer.stop('Not so complicated loop')

            dHP_and_dSP_aauim = {}

            a2values = {}
            for (a2, a3) in atompairs:
                if not a3 in a2values:
                    a2values[a3] = []
                a2values[a3].append(a2)
            
            Fatom_av = np.zeros_like(F_av)
            Frho_av = np.zeros_like(F_av)
            self.timer.start('Complicated loop')
            for a1, a3 in atompairs:
                if a1 == a3:
                    continue
                
                m1start = M_a[a1] - M1start
                if m1start >= blocksize1:
                    continue
                
                P_expansion = P_expansions.get(a1, a3)
                nm1 = P_expansion.shape[0]
                m1stop = min(m1start + nm1, m1max)
                if m1stop <= 0:
                    continue

                m1start = max(m1start, 0)
                J1start = max(0, M1start - M_a[a1])
                J1stop = J1start + m1stop - m1start

                disp_o = get_displacements(a1, a3,
                                           phicutoff_a[a1] + pcutoff_a[a3])
                if len(disp_o) == 0:
                    continue
                
                dPdR_qvmi = P_expansion.zeros((nq, 3), dtype=dtype)
                for disp in disp_o:
                    disp.evaluate_overlap(P_expansion, dPdR_qvmi)

                dPdR_qvmi = dPdR_qvmi[:, :, J1start:J1stop, :].copy()
                for a2 in a2values[a3]:
                    m2start = M_a[a2] - M2start
                    if m2start >= blocksize2:
                        continue

                    P_expansion2 = P_expansions.get(a2, a3)
                    nm2 = P_expansion2.shape[0]
                    m2stop = min(m2start + nm2, m2max)
                    if m2stop <= 0:
                        continue
                    
                    disp_o = get_displacements(a2, a3,
                                               phicutoff_a[a2] + pcutoff_a[a3])
                    if len(disp_o) == 0:
                        continue

                    m2start = max(m2start, 0)
                    J2start = max(0, M2start - M_a[a2])
                    J2stop = J2start + m2stop - m2start

                    if (a2, a3) in dHP_and_dSP_aauim:
                        dHP_uim, dSP_uim = dHP_and_dSP_aauim[(a2, a3)]
                    else:
                        P_qmi = P_expansion2.zeros((nq,), dtype=dtype)
                        for disp in disp_o:
                            disp.evaluate_direct(P_expansion2, P_qmi)
                        P_qmi = P_qmi[:, J2start:J2stop].copy()
                        dH_sp = alldH_asp[a3]
                        dS_ii = self.setups[a3].dO_ii
                        
                        dHP_uim = []
                        dSP_uim = []
                        for u, kpt in enumerate(self.kpt_u):
                            dH_ii = unpack(dH_sp[kpt.s])
                            dHP_im = np.dot(P_qmi[kpt.q], dH_ii).T.conj()
                            # XXX only need nq of these
                            dSP_im = np.dot(P_qmi[kpt.q], dS_ii).T.conj()
                            dHP_uim.append(dHP_im)
                            dSP_uim.append(dSP_im)
                            dHP_and_dSP_aauim[(a2, a3)] = dHP_uim, dSP_uim
                    
                    for u, kpt in enumerate(self.kpt_u):
                        rhoT_mm = rhoT_umm[u][m1start:m1stop, m2start:m2stop]
                        ET_mm = ET_umm[u][m1start:m1stop, m2start:m2stop]
                        dPdRdHP_vmm = np.dot(dPdR_qvmi[kpt.q], dHP_uim[u])
                        dPdRdSP_vmm = np.dot(dPdR_qvmi[kpt.q], dSP_uim[u])
                        
                        Fatom_c = 2.0 * (dPdRdHP_vmm
                                         * rhoT_mm).real.sum(-1).sum(-1)
                        Frho_c = 2.0 * (dPdRdSP_vmm
                                        * ET_mm).real.sum(-1).sum(-1)
                        Fatom_av[a1] += Fatom_c
                        Fatom_av[a3] -= Fatom_c

                        Frho_av[a1] -= Frho_c
                        Frho_av[a3] += Frho_c
                        
            self.timer.stop('Complicated loop')
        
        if not isblacs:
            # Potential contribution
            #
            #           -----      /  d Phi  (r)
            #  a         \        |        mu    ~
            # F += -2 Re  )       |   ---------- v (r)  Phi  (r) dr rho
            #            /        |     d R                nu          nu mu
            #           -----    /         a
            #        mu in a; nu
            #
            self.timer.start('Potential')
            Fpot_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                vt_G = hamiltonian.vt_sG[kpt.s]
                Fpot_av += bfs.calculate_force_contribution(vt_G, rhoT_uMM[u],
                                                            kpt.q)
            self.timer.stop('Potential')

            # Density matrix contribution from PAW correction
            #
            #           -----                        -----
            #  a         \      a                     \     b
            # F +=  2 Re  )    Z      E        - 2 Re  )   Z      E
            #            /      mu nu  nu mu          /     mu nu  nu mu
            #           -----                        -----
            #           mu nu                    b; mu in a; nu
            #
            # with
            #                  b*
            #         -----  dP
            #   b      \       i mu    b   b
            #  Z     =  )   -------- dS   P
            #   mu nu  /     dR        ij  j nu
            #         -----    b mu
            #           ij
            #
            self.timer.start('Paw correction')
            Frho_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                work_MM = np.zeros((mynao, nao), dtype)
                ZE_MM = None
                for b in my_atom_indices:
                    setup = self.setups[b]
                    dO_ii = np.asarray(setup.dO_ii, dtype)
                    dOP_iM = np.zeros((setup.ni, nao), dtype)
                    gemm(1.0, self.P_aqMi[b][kpt.q], dO_ii, 0.0, dOP_iM, 'c')
                    for v in range(3):
                        gemm(1.0, dOP_iM, dPdR_aqvMi[b][kpt.q][v][Mstart:Mstop],
                             0.0, work_MM, 'n')
                        ZE_MM = (work_MM * ET_uMM[u]).real
                        for a, M1, M2 in slices():
                            dE = 2 * ZE_MM[M1:M2].sum()
                            Frho_av[a, v] -= dE # the "b; mu in a; nu" term
                            Frho_av[b, v] += dE # the "mu nu" term
            del work_MM, ZE_MM
            self.timer.stop('Paw correction')

            # Atomic density contribution
            #            -----                         -----
            #  a          \     a                       \     b
            # F  += -2 Re  )   A      rho       + 2 Re   )   A      rho
            #             /     mu nu    nu mu          /     mu nu    nu mu
            #            -----                         -----
            #            mu nu                     b; mu in a; nu
            #
            #                  b*
            #         ----- d P
            #  b       \       i mu   b   b
            # A     =   )   ------- dH   P
            #  mu nu   /    d R       ij  j nu
            #         -----    b mu
            #           ij
            #
            self.timer.start('Atomic Hamiltonian force')
            Fatom_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                for b in my_atom_indices:
                    H_ii = np.asarray(unpack(hamiltonian.dH_asp[b][kpt.s]), dtype)
                    HP_iM = gemmdot(H_ii,
                                    np.ascontiguousarray(self.P_aqMi[b][kpt.q].T.conj()))
                    for v in range(3):
                        dPdR_Mi = dPdR_aqvMi[b][kpt.q][v][Mstart:Mstop]
                        ArhoT_MM = (gemmdot(dPdR_Mi, HP_iM) * rhoT_uMM[u]).real
                        for a, M1, M2 in slices():
                            dE = 2 * ArhoT_MM[M1:M2].sum()
                            Fatom_av[a, v] += dE # the "b; mu in a; nu" term
                            Fatom_av[b, v] -= dE # the "mu nu" term
            self.timer.stop('Atomic Hamiltonian force')

        F_av += Fkin_av + Fpot_av + Ftheta_av + Frho_av + Fatom_av
        self.timer.start('Wait for sum')
        ksl.orbital_comm.sum(F_av)
        if self.bd.comm.rank == 0:
            self.kpt_comm.sum(F_av, 0)
        self.timer.stop('Wait for sum')
        self.timer.stop('LCAO forces')

Пример #23

Показать файл

    def density_matrix(self,
                       n,
                       m,
                       k,
                       kq=None,
                       spin1=0,
                       spin2=0,
                       phi_aGp=None,
                       Gspace=True):

        gd = self.gd
        kd = self.kd
        optical_limit = False

        if kq is None:
            kq = self.kq_k[k]
            expqr_g = self.expqr_g
            q_v = self.qq_v
            optical_limit = self.optical_limit
            q_c = self.q_c
        else:
            q_c = kd.bzk_kc[kq] - kd.bzk_kc[k]
            q_c[np.where(q_c > 0.501)] -= 1
            q_c[np.where(q_c < -0.499)] += 1

            if (np.abs(q_c) < self.ftol).all():
                optical_limit = True
                q_c = self.q_c
            q_v = np.dot(q_c, self.bcell_cv)
            r_vg = gd.get_grid_point_coordinates()  # (3, nG)
            qr_g = gemmdot(q_v, r_vg, beta=0.0)
            expqr_g = np.exp(-1j * qr_g)
            if optical_limit:
                expqr_g = 1

        ibzkpt1 = kd.bz2ibz_k[k]
        ibzkpt2 = kd.bz2ibz_k[kq]

        psitold_g = self.get_wavefunction(ibzkpt1, n, True, spin=spin1)
        psit1_g = kd.transform_wave_function(psitold_g, k)

        psitold_g = self.get_wavefunction(ibzkpt2, m, True, spin=spin2)
        psit2_g = kd.transform_wave_function(psitold_g, kq)

        if Gspace is False:
            return psit1_g.conj() * psit2_g * expqr_g
        else:
            tmp_g = psit1_g.conj() * psit2_g * expqr_g
            # zero padding is included through the FFT
            rho_g = np.fft.fftn(tmp_g, s=self.nGrpad) * self.vol / self.nG0rpad

            # Here, planewave cutoff is applied
            rho_G = rho_g.ravel()[self.Gindex_G]

            if optical_limit:
                dpsit_g = gd.empty(dtype=complex)
                tmp = np.zeros((3), dtype=complex)
                phase_cd = np.exp(2j * pi * gd.sdisp_cd *
                                  kd.bzk_kc[kq, :, np.newaxis])
                for ix in range(3):
                    self.d_c[ix](psit2_g, dpsit_g, phase_cd)
                    tmp[ix] = gd.integrate(psit1_g.conj() * dpsit_g)
                rho_G[0] = -1j * np.dot(q_v, tmp)

            calc = self.calc
            pt = self.pt
            if not self.pwmode:
                if calc.wfs.world.size > 1 or kd.nbzkpts == 1:
                    P1_ai = pt.dict()
                    pt.integrate(psit1_g, P1_ai, k)
                    P2_ai = pt.dict()
                    pt.integrate(psit2_g, P2_ai, kq)
                else:
                    P1_ai = self.get_P_ai(k, n, spin1)
                    P2_ai = self.get_P_ai(kq, m, spin2)
            else:
                # first calculate P_ai at ibzkpt, then rotate to k
                u = self.kd.get_rank_and_index(spin1, ibzkpt1)[1]
                Ptmp_ai = pt.dict()
                kpt = calc.wfs.kpt_u[u]
                pt.integrate(kpt.psit_nG[n], Ptmp_ai, ibzkpt1)
                P1_ai = self.get_P_ai(k, n, spin1, Ptmp_ai)

                u = self.kd.get_rank_and_index(spin2, ibzkpt2)[1]
                Ptmp_ai = pt.dict()
                kpt = calc.wfs.kpt_u[u]
                pt.integrate(kpt.psit_nG[m], Ptmp_ai, ibzkpt2)
                P2_ai = self.get_P_ai(kq, m, spin2, Ptmp_ai)

            if phi_aGp is None:
                try:
                    if not self.mode == 'RPA':
                        if optical_limit:
                            iq = kd.where_is_q(np.zeros(3), self.bzq_qc)
                        else:
                            iq = kd.where_is_q(q_c, self.bzq_qc)
                            assert np.abs(self.bzq_qc[iq] - q_c).sum() < 1e-8

                    phi_aGp = self.load_phi_aGp(self.reader, iq)  #phi_qaGp[iq]
                except AttributeError:
                    phi_aGp = self.phi_aGp

            for a, id in enumerate(self.calc.wfs.setups.id_a):
                P_p = np.outer(P1_ai[a].conj(), P2_ai[a]).ravel()
                phi_Gp = np.ascontiguousarray(phi_aGp[a], complex)
                gemv(1.0, phi_Gp, P_p, 1.0, rho_G)

            if optical_limit:
                if n == m:
                    rho_G[0] = 1.
                elif np.abs(self.e_skn[spin2][ibzkpt2, m] -
                            self.e_skn[spin1][ibzkpt1, n]) < 1e-5:
                    rho_G[0] = 0.
                else:
                    rho_G[0] /= (self.enoshift_skn[spin2][ibzkpt2, m] -
                                 self.enoshift_skn[spin1][ibzkpt1, n])

            return rho_G

Пример #24

Показать файл

Файл: base.py Проект: ryancoleman/lotsofcoresbook2code

    def density_matrix(self, n, m, k, kq=None,
                       spin1=0, spin2=0, phi_aGp=None, Gspace=True):

        gd = self.gd
        kd = self.kd
        optical_limit = False

        if kq is None:
            kq = self.kq_k[k]
            expqr_g = self.expqr_g
            q_v = self.qq_v
            optical_limit = self.optical_limit
            q_c = self.q_c
        else:
            q_c = kd.bzk_kc[kq] - kd.bzk_kc[k]
            q_c[np.where(q_c>0.501)] -= 1
            q_c[np.where(q_c<-0.499)] += 1
            
            if (np.abs(q_c) < self.ftol).all():
                optical_limit = True
                q_c = self.q_c
            q_v = np.dot(q_c, self.bcell_cv)
            r_vg = gd.get_grid_point_coordinates() # (3, nG)
            qr_g = gemmdot(q_v, r_vg, beta=0.0)
            expqr_g = np.exp(-1j * qr_g)
            if optical_limit:
                expqr_g = 1

        ibzkpt1 = kd.bz2ibz_k[k]
        ibzkpt2 = kd.bz2ibz_k[kq]
        
        psitold_g = self.get_wavefunction(ibzkpt1, n, True, spin=spin1)
        psit1_g = kd.transform_wave_function(psitold_g, k)
        
        psitold_g = self.get_wavefunction(ibzkpt2, m, True, spin=spin2)
        psit2_g = kd.transform_wave_function(psitold_g, kq)

        if Gspace is False:
            return psit1_g.conj() * psit2_g * expqr_g
        else:
            tmp_g = psit1_g.conj()* psit2_g * expqr_g
            # zero padding is included through the FFT
            rho_g = np.fft.fftn(tmp_g, s=self.nGrpad) * self.vol / self.nG0rpad

            # Here, planewave cutoff is applied
            rho_G = rho_g.ravel()[self.Gindex_G]

            if optical_limit:
                dpsit_g = gd.empty(dtype=complex)
                tmp = np.zeros((3), dtype=complex)
                phase_cd = np.exp(2j * pi * gd.sdisp_cd * kd.bzk_kc[kq, :, np.newaxis])
                for ix in range(3):
                    self.d_c[ix](psit2_g, dpsit_g, phase_cd)
                    tmp[ix] = gd.integrate(psit1_g.conj() * dpsit_g)
                rho_G[0] = -1j * np.dot(q_v, tmp)
                
            calc = self.calc
            pt = self.pt
            if not self.pwmode:
                if calc.wfs.world.size > 1 or kd.nbzkpts == 1:
                    P1_ai = pt.dict()
                    pt.integrate(psit1_g, P1_ai, k)
                    P2_ai = pt.dict()
                    pt.integrate(psit2_g, P2_ai, kq)
                else:
                    P1_ai = self.get_P_ai(k, n, spin1)
                    P2_ai = self.get_P_ai(kq, m, spin2)
            else:
                # first calculate P_ai at ibzkpt, then rotate to k
                u = self.kd.get_rank_and_index(spin1, ibzkpt1)[1]
                Ptmp_ai = pt.dict()
                kpt = calc.wfs.kpt_u[u]
                pt.integrate(kpt.psit_nG[n], Ptmp_ai, ibzkpt1)
                P1_ai = self.get_P_ai(k, n, spin1, Ptmp_ai)

                u = self.kd.get_rank_and_index(spin2, ibzkpt2)[1]
                Ptmp_ai = pt.dict()
                kpt = calc.wfs.kpt_u[u]
                pt.integrate(kpt.psit_nG[m], Ptmp_ai, ibzkpt2)
                P2_ai = self.get_P_ai(kq, m, spin2, Ptmp_ai)

            if phi_aGp is None:
                try:
                    if not self.mode == 'RPA':
                        if optical_limit:
                            iq = kd.where_is_q(np.zeros(3), self.bzq_qc)
                        else:
                            iq = kd.where_is_q(q_c, self.bzq_qc)
                            assert np.abs(self.bzq_qc[iq] - q_c).sum() < 1e-8

                    phi_aGp = self.load_phi_aGp(self.reader, iq) #phi_qaGp[iq]
                except AttributeError:
                    phi_aGp = self.phi_aGp

            for a, id in enumerate(self.calc.wfs.setups.id_a):
                P_p = np.outer(P1_ai[a].conj(), P2_ai[a]).ravel()
                phi_Gp = np.ascontiguousarray(phi_aGp[a], complex)
                gemv(1.0, phi_Gp, P_p, 1.0, rho_G)

            if optical_limit:
                if n==m:
                    rho_G[0] = 1.
                elif np.abs(self.e_skn[spin2][ibzkpt2, m] - self.e_skn[spin1][ibzkpt1, n]) < 1e-5:
                    rho_G[0] = 0.
                else:
                    rho_G[0] /= (self.enoshift_skn[spin2][ibzkpt2, m] - self.enoshift_skn[spin1][ibzkpt1, n])

            return rho_G

Пример #25

Показать файл

Файл: blas.py Проект: qsnake/gpaw

import numpy as np
from gpaw.utilities.blas import \
     gemm, axpy, r2k, rk, gemmdot, rotate, dotc, dotu
from gpaw.utilities.tools import tri2full

a = np.arange(5 * 7).reshape(5, 7) + 4.
a2 = np.arange(3 * 7).reshape(3, 7) + 3.
b = np.arange(7) - 2.

# Check gemmdot with floats
assert np.all(np.dot(a, b) == gemmdot(a, b))
assert np.all(np.dot(a, a2.T) == gemmdot(a, a2, trans='t'))
assert np.all(np.dot(a, a2.T) == gemmdot(a, a2, trans='c'))
assert np.dot(b, b) == gemmdot(b, b)

# Check gemmdot with complex arrays
a = a * (2 + 1.j)
a2 = a2 * (-1 + 3.j)
b = b * (3 - 2.j)
assert np.all(np.dot(a, b) == gemmdot(a, b))
assert np.all(np.dot(a, a2.T) == gemmdot(a, a2, trans='t'))
assert np.all(np.dot(a, a2.T.conj()) == gemmdot(a, a2, trans='c'))
assert np.dot(b, b) == gemmdot(b, b, trans='n')
assert np.dot(b, b.conj()) == gemmdot(b, b, trans='c')
assert np.vdot(a, 5.j * a) == dotc(a, 5.j * a)

# Check gemm for transa='n'
a2 = np.arange(7 * 5 * 1 * 3).reshape(7, 5, 1, 3) * (-1. + 4.j) + 3.
c = np.tensordot(a, a2, [1, 0])
gemm(1., a2, a, -1., c, 'n')
assert not c.any()

Пример #26

Показать файл

Файл: kernel.py Проект: qsnake/gpaw

def calculate_Kxc(gd, nt_sG, npw, Gvec_Gc, nG, vol,
                  bcell_cv, R_av, setups, D_asp):
    """LDA kernel"""

    # The soft part
    assert np.abs(nt_sG[0].shape - nG).sum() == 0

    xc = XC('LDA')
    
    fxc_sg = np.zeros_like(nt_sG)
    xc.calculate_fxc(gd, nt_sG, fxc_sg)
    fxc_g = fxc_sg[0]

    # FFT fxc(r)
    nG0 = nG[0] * nG[1] * nG[2]
    tmp_g = np.fft.fftn(fxc_g) * vol / nG0

    r_vg = gd.get_grid_point_coordinates()
    
    Kxc_GG = np.zeros((npw, npw), dtype=complex)
    for iG in range(npw):
        for jG in range(npw):
            dG_c = Gvec_Gc[iG] - Gvec_Gc[jG]
            if (nG / 2 - np.abs(dG_c) > 0).all():
                index = (dG_c + nG) % nG
                Kxc_GG[iG, jG] = tmp_g[index[0], index[1], index[2]]
            else: # not in the fft index
                dG_v = np.dot(dG_c, bcell_cv)
                dGr_g = gemmdot(dG_v, r_vg, beta=0.0) 
                Kxc_GG[iG, jG] = gd.integrate(np.exp(-1j*dGr_g)*fxc_g)

    KxcPAW_GG = np.zeros_like(Kxc_GG)
    # The PAW part
    dG_GGv = np.zeros((npw, npw, 3))
    for iG in range(npw):
        for jG in range(npw):
            dG_c = Gvec_Gc[iG] - Gvec_Gc[jG]
            dG_GGv[iG, jG] =  np.dot(dG_c, bcell_cv)

    for a, setup in enumerate(setups):
        if rank == a % size:
            rgd = setup.xc_correction.rgd
            n_qg = setup.xc_correction.n_qg
            nt_qg = setup.xc_correction.nt_qg
            nc_g = setup.xc_correction.nc_g
            nct_g = setup.xc_correction.nct_g
            Y_nL = setup.xc_correction.Y_nL
            dv_g = rgd.dv_g
        
            D_sp = D_asp[a]
            B_pqL = setup.xc_correction.B_pqL
            D_sLq = np.inner(D_sp, B_pqL.T)
            nspins = len(D_sp)
            assert nspins == 1
            
            f_sg = rgd.empty(nspins)
            ft_sg = rgd.empty(nspins)
        
            n_sLg = np.dot(D_sLq, n_qg)
            nt_sLg = np.dot(D_sLq, nt_qg)
            # Add core density
            n_sLg[:, 0] += sqrt(4 * pi) / nspins * nc_g
            nt_sLg[:, 0] += sqrt(4 * pi) / nspins * nct_g
            
            coefatoms_GG = np.exp(-1j * np.inner(dG_GGv, R_av[a]))
        
            for n, Y_L in enumerate(Y_nL):
                w = weight_n[n]
                f_sg[:] = 0.0
                n_sg = np.dot(Y_L, n_sLg)
                xc.calculate_fxc(rgd, n_sg, f_sg)
                
                ft_sg[:] = 0.0
                nt_sg = np.dot(Y_L, nt_sLg)
                xc.calculate_fxc(rgd, nt_sg, ft_sg)
        
                coef_GGg = np.exp(-1j * np.outer(np.inner(dG_GGv, R_nv[n]),
                                                 rgd.r_g)).reshape(npw,npw,rgd.ng)
                KxcPAW_GG += w * np.dot(coef_GGg, (f_sg[0]-ft_sg[0]) * dv_g) * coefatoms_GG
    world.sum(KxcPAW_GG)
    Kxc_GG += KxcPAW_GG
    
    return Kxc_GG / vol

Пример #27

Показать файл

    def calculate_rkernel(self):

        gd = self.gd
        ng_c = gd.N_c
        cell_cv = gd.cell_cv
        icell_cv = 2 * np.pi * np.linalg.inv(cell_cv)
        vol = np.linalg.det(cell_cv)

        ns = self.calc.wfs.nspins
        n_g = self.n_g  # density on rough grid

        fx_g = ns * self.get_fxc_g(n_g)  # local exchange kernel
        qc_g = (-4 * np.pi * ns / fx_g)**0.5  # cutoff functional
        flocal_g = qc_g**3 * fx_g / (6 * np.pi**2)  # ren. x-kernel for r=r'
        Vlocal_g = 2 * qc_g / np.pi  # ren. Hartree kernel for r=r'

        ng = np.prod(ng_c)  # number of grid points
        r_vg = gd.get_grid_point_coordinates()
        rx_g = r_vg[0].flatten()
        ry_g = r_vg[1].flatten()
        rz_g = r_vg[2].flatten()

        prnt('    %d grid points and %d plane waves at the Gamma point' %
             (ng, self.pd.ngmax),
             file=self.fd)

        # Unit cells
        R_Rv = []
        weight_R = []
        nR_v = self.unit_cells
        nR = np.prod(nR_v)
        for i in range(-nR_v[0] + 1, nR_v[0]):
            for j in range(-nR_v[1] + 1, nR_v[1]):
                for h in range(-nR_v[2] + 1, nR_v[2]):
                    R_Rv.append(i * cell_cv[0] + j * cell_cv[1] +
                                h * cell_cv[2])
                    weight_R.append((nR_v[0] - abs(i)) * (nR_v[1] - abs(j)) *
                                    (nR_v[2] - abs(h)) / float(nR))
        if nR > 1:
            # with more than one unit cell only the exchange kernel is
            # calculated on the grid. The bare Coulomb kernel is added
            # in PW basis and Vlocal_g only the exchange part
            dv = self.calc.density.gd.dv
            gc = (3 * dv / 4 / np.pi)**(1 / 3.)
            Vlocal_g -= 2 * np.pi * gc**2 / dv
            prnt('    Lattice point sampling: ' + '(%s x %s x %s)^2 ' %
                 (nR_v[0], nR_v[1], nR_v[2]) +
                 ' Reduced to %s lattice points' % len(R_Rv),
                 file=self.fd)

        l_g_size = -(-ng // mpi.world.size)
        l_g_range = range(mpi.world.rank * l_g_size,
                          min((mpi.world.rank + 1) * l_g_size, ng))

        fhxc_qsGr = {}
        for iq in range(len(self.ibzq_qc)):
            fhxc_qsGr[iq] = np.zeros(
                (ns, len(self.pd.G2_qG[iq]), len(l_g_range)), dtype=complex)

        inv_error = np.seterr()
        np.seterr(invalid='ignore')
        np.seterr(divide='ignore')

        t0 = time()
        # Loop over Lattice points
        for i, R_v in enumerate(R_Rv):
            # Loop over r'. f_rr and V_rr are functions of r (dim. as r_vg[0])
            if i == 1:
                prnt('      Finished 1 cell in %s seconds' % int(time() - t0) +
                     ' - estimated %s seconds left' % int(
                         (len(R_Rv) - 1) * (time() - t0)),
                     file=self.fd)
                self.fd.flush()
            if len(R_Rv) > 5:
                if (i + 1) % (len(R_Rv) / 5 + 1) == 0:
                    prnt('      Finished %s cells in %s seconds' %
                         (i, int(time() - t0)) +
                         ' - estimated %s seconds left' % int(
                             (len(R_Rv) - i) * (time() - t0) / i),
                         file=self.fd)
                    self.fd.flush()
            for g in l_g_range:
                rx = rx_g[g] + R_v[0]
                ry = ry_g[g] + R_v[1]
                rz = rz_g[g] + R_v[2]

                # |r-r'-R_i|
                rr = ((r_vg[0] - rx)**2 + (r_vg[1] - ry)**2 +
                      (r_vg[2] - rz)**2)**0.5

                n_av = (n_g + n_g.flatten()[g]) / 2.
                fx_g = ns * self.get_fxc_g(n_av, index=g)
                qc_g = (-4 * np.pi * ns / fx_g)**0.5
                x = qc_g * rr
                osc_x = np.sin(x) - x * np.cos(x)
                f_rr = fx_g * osc_x / (2 * np.pi**2 * rr**3)
                if nR > 1:  # include only exchange part of the kernel here
                    V_rr = (sici(x)[0] * 2 / np.pi - 1) / rr
                else:  # include the full kernel (also hartree part)
                    V_rr = (sici(x)[0] * 2 / np.pi) / rr

                # Terms with r = r'
                if (np.abs(R_v) < 0.001).all():
                    tmp_flat = f_rr.flatten()
                    tmp_flat[g] = flocal_g.flatten()[g]
                    f_rr = tmp_flat.reshape(ng_c)
                    tmp_flat = V_rr.flatten()
                    tmp_flat[g] = Vlocal_g.flatten()[g]
                    V_rr = tmp_flat.reshape(ng_c)
                    del tmp_flat

                f_rr[np.where(n_av < self.density_cut)] = 0.0
                V_rr[np.where(n_av < self.density_cut)] = 0.0

                f_rr *= weight_R[i]
                V_rr *= weight_R[i]

                # r-r'-R_i
                r_r = np.array([r_vg[0] - rx, r_vg[1] - ry, r_vg[2] - rz])

                # Fourier transform of r
                for iq, q in enumerate(self.ibzq_qc):
                    q_v = np.dot(q, icell_cv)
                    e_q = np.exp(-1j * gemmdot(q_v, r_r, beta=0.0))
                    f_q = self.pd.fft((f_rr + V_rr) * e_q, iq) * vol / ng
                    fhxc_qsGr[iq][0, :, g - l_g_range[0]] += f_q
                    if ns == 2:
                        f_q = self.pd.fft(V_rr * e_q, iq) * vol / ng
                        fhxc_qsGr[iq][1, :, g - l_g_range[0]] += f_q

        mpi.world.barrier()

        np.seterr(**inv_error)

        for iq, q in enumerate(self.ibzq_qc):
            npw = len(self.pd.G2_qG[iq])
            fhxc_sGsG = np.zeros((ns * npw, ns * npw), complex)
            l_pw_size = -(-npw // mpi.world.size)  # parallelize over PW below
            l_pw_range = range(mpi.world.rank * l_pw_size,
                               min((mpi.world.rank + 1) * l_pw_size, npw))

            if mpi.world.size > 1:
                # redistribute grid and plane waves in fhxc_qsGr[iq]
                bg1 = BlacsGrid(mpi.world, 1, mpi.world.size)
                bg2 = BlacsGrid(mpi.world, mpi.world.size, 1)
                bd1 = bg1.new_descriptor(npw, ng, npw, -(-ng / mpi.world.size))
                bd2 = bg2.new_descriptor(npw, ng, -(-npw / mpi.world.size), ng)

                fhxc_Glr = np.zeros((len(l_pw_range), ng), dtype=complex)
                if ns == 2:
                    Koff_Glr = np.zeros((len(l_pw_range), ng), dtype=complex)

                r = Redistributor(bg1.comm, bd1, bd2)
                r.redistribute(fhxc_qsGr[iq][0], fhxc_Glr, npw, ng)
                if ns == 2:
                    r.redistribute(fhxc_qsGr[iq][1], Koff_Glr, npw, ng)
            else:
                fhxc_Glr = fhxc_qsGr[iq][0]
                if ns == 2:
                    Koff_Glr = fhxc_qsGr[iq][1]

            # Fourier transform of r'
            for iG in range(len(l_pw_range)):
                f_g = fhxc_Glr[iG].reshape(ng_c)
                f_G = self.pd.fft(f_g.conj(), iq) * vol / ng
                fhxc_sGsG[l_pw_range[0] + iG, :npw] = f_G.conj()
                if ns == 2:
                    v_g = Koff_Glr[iG].reshape(ng_c)
                    v_G = self.pd.fft(v_g.conj(), iq) * vol / ng
                    fhxc_sGsG[npw + l_pw_range[0] + iG, :npw] = v_G.conj()

            if ns == 2:  # f_00 = f_11 and f_01 = f_10
                fhxc_sGsG[:npw, npw:] = fhxc_sGsG[npw:, :npw]
                fhxc_sGsG[npw:, npw:] = fhxc_sGsG[:npw, :npw]

            mpi.world.sum(fhxc_sGsG)
            fhxc_sGsG /= vol

            if mpi.rank == 0:
                w = Writer('fhxc_%s_%s_%s_%s.gpw' %
                           (self.tag, self.xc, self.ecut, iq))
                w.dimension('sG', ns * npw)
                w.add('fhxc_sGsG', ('sG', 'sG'), dtype=complex)
                if nR > 1:  # add Hartree kernel evaluated in PW basis
                    Gq2_G = self.pd.G2_qG[iq]
                    if (q == 0).all():
                        Gq2_G[0] = 1.
                    vq_G = 4 * np.pi / Gq2_G
                    fhxc_sGsG += np.tile(np.eye(npw) * vq_G, (ns, ns))
                w.fill(fhxc_sGsG)
                w.close()
            mpi.world.barrier()
        prnt(file=self.fd)

Пример #28

Показать файл

Файл: gw.py Проект: robwarm/gpaw-symm

    def get_self_energy(self, df, W_wGG):

        Sigma_skn = np.zeros((self.nspins, self.gwnkpt, self.gwnband), dtype=float)
        dSigma_skn = np.zeros((self.nspins, self.gwnkpt, self.gwnband), dtype=float)

        wcomm = df.wcomm

        if self.static:
            W_wGG = np.array([W_wGG])

        if not self.hilbert_trans: #method 1
            Wbackup_wG0 = W_wGG[:,:,0].copy()
            Wbackup_w0G = W_wGG[:,0,:].copy()

        else: #method 2, perform Hilbert transform
            nG = np.shape(W_wGG)[1]
            coords = np.zeros(wcomm.size, dtype=int)
            nG_local = nG**2 // wcomm.size
            if wcomm.rank == wcomm.size - 1:
                  nG_local = nG**2 - (wcomm.size - 1) * nG_local
            wcomm.all_gather(np.array([nG_local]), coords)
            W_Wg = SliceAlongFrequency(W_wGG, coords, wcomm)

            ng = np.shape(W_Wg)[1]
            Nw = int(self.w_w[-1] / self.dw)

            w1_ww = np.zeros((Nw, df.Nw), dtype=complex)
            for iw in range(Nw):
                w1 = iw * self.dw
                w1_ww[iw] = 1./(w1 + self.w_w + 1j*self.eta_w) + 1./(w1 - self.w_w + 1j*self.eta_w)
                w1_ww[iw,0] -= 1./(w1 + 1j*self.eta_w[0]) # correct w'=0
                w1_ww[iw] *= self.dw_w

            Cplus_Wg = np.zeros((Nw, ng), dtype=complex)
            Cminus_Wg = np.zeros((Nw, ng), dtype=complex)
            Cplus_Wg = gemmdot(w1_ww, W_Wg, beta=0.0)
            Cminus_Wg = gemmdot(w1_ww.conj(), W_Wg, beta=0.0)

        for s in range(self.nspins):
            for i, k in enumerate(self.gwkpt_k): # k is bzk index
                if df.optical_limit:
                    kq_c = df.kd.bzk_kc[k]
                else:
                    kq_c = df.kd.bzk_kc[k] - df.q_c  # k - q

                kq = df.kd.where_is_q(kq_c, df.kd.bzk_kc)            
                assert df.kq_k[kq] == k
                ibzkpt1 = df.kd.bz2ibz_k[k]
                ibzkpt2 = df.kd.bz2ibz_k[kq]

                for j, n in enumerate(self.bands):
                    for m in range(self.m_start, self.m_end):

                        if self.e_skn[s][ibzkpt2, m] > self.eFermi:
                            sign = 1.
                        else:
                            sign = -1.

                        rho_G = df.density_matrix(m, n, kq, spin1=s, spin2=s)

                        if not self.hilbert_trans: #method 1
                            W_wGG[:,:,0] = Wbackup_wG0
                            W_wGG[:,0,:] = Wbackup_w0G

                            # w1 = w - epsilon_m,k-q
                            w1 = self.e_skn[s][ibzkpt1,n] - self.e_skn[s][ibzkpt2,m]

                            if self.ppa:
                                # analytical expression for Plasmon Pole Approximation
                                W_GG = sign * W_wGG[0] * (1./(w1 + self.wt_GG - 1j*self.eta) -
                                                          1./(w1 - self.wt_GG + 1j*self.eta))
                                W_GG -= W_wGG[0] * (1./(w1 + self.wt_GG + 1j*self.eta*sign) +
                                                    1./(w1 - self.wt_GG + 1j*self.eta*sign))
                                W_G = gemmdot(W_GG, rho_G, beta=0.0)
                                Sigma_skn[s,i,j] += np.real(gemmdot(W_G, rho_G, alpha=self.alpha, beta=0.0,trans='c'))

                                W_GG = sign * W_wGG[0] * (1./(w1 - self.wt_GG + 1j*self.eta)**2 -
                                                          1./(w1 + self.wt_GG - 1j*self.eta)**2)
                                W_GG += W_wGG[0] * (1./(w1 - self.wt_GG + 1j*self.eta*sign)**2 +
                                                    1./(w1 + self.wt_GG + 1j*self.eta*sign)**2)
                                W_G = gemmdot(W_GG, rho_G, beta=0.0)
                                dSigma_skn[s,i,j] += np.real(gemmdot(W_G, rho_G, alpha=self.alpha, beta=0.0,trans='c'))

                            elif self.static:
                                W1_GG = W_wGG[0] - np.eye(df.npw)*self.Kc_GG
                                W2_GG = W_wGG[0]

                                # perform W_GG * np.outer(rho_G.conj(), rho_G).sum(GG)
                                W_G = gemmdot(W1_GG, rho_G, beta=0.0) # Coulomb Hole
                                Sigma_skn[s,i,j] += np.real(gemmdot(W_G, rho_G, alpha=self.alpha*pi/1j, beta=0.0,trans='c'))
                                if sign == -1:
                                    W_G = gemmdot(W2_GG, rho_G, beta=0.0) # Screened Exchange
                                    Sigma_skn[s,i,j] -= np.real(gemmdot(W_G, rho_G, alpha=2*self.alpha*pi/1j, beta=0.0,trans='c'))
                                del W1_GG, W2_GG, W_G, rho_G

                            else:
                                # perform W_wGG * np.outer(rho_G.conj(), rho_G).sum(GG)
                                W_wG = gemmdot(W_wGG, rho_G, beta=0.0)
                                C_wlocal = gemmdot(W_wG, rho_G, alpha=self.alpha, beta=0.0,trans='c')
                                del W_wG, rho_G

                                C_w = np.zeros(df.Nw, dtype=complex)
                                wcomm.all_gather(C_wlocal, C_w)
                                del C_wlocal

                                # calculate self energy
                                w1_w = 1./(w1 - self.w_w + 1j*self.eta_w*sign) + 1./(w1 + self.w_w + 1j*self.eta_w*sign)
                                w1_w[0] -= 1./(w1 + 1j*self.eta_w[0]*sign) # correct w'=0
                                w1_w *= self.dw_w
                                Sigma_skn[s,i,j] += np.real(gemmdot(C_w, w1_w, beta=0.0))

                                # calculate derivate of self energy with respect to w
                                w1_w = 1./(w1 - self.w_w + 1j*self.eta_w*sign)**2 + 1./(w1 + self.w_w + 1j*self.eta_w*sign)**2
                                w1_w[0] -= 1./(w1 + 1j*self.eta_w[0]*sign)**2 # correct w'=0
                                w1_w *= self.dw_w
                                dSigma_skn[s,i,j] -= np.real(gemmdot(C_w, w1_w, beta=0.0))

                        else: #method 2
                            if not np.abs(self.e_skn[s][ibzkpt2,m] - self.e_skn[s][ibzkpt1,n]) < 1e-10:
                                sign *= np.sign(self.e_skn[s][ibzkpt1,n] - self.e_skn[s][ibzkpt2,m])

                            # find points on frequency grid
                            w0 = self.e_skn[s][ibzkpt1,n] - self.e_skn[s][ibzkpt2,m]
                            w0_id = np.abs(int(w0 / self.dw))
                            w1 = w0_id * self.dw
                            w2 = (w0_id + 1) * self.dw

                            # choose plus or minus, treat optical limit:
                            if sign == 1:
                                C_Wg = Cplus_Wg[w0_id:w0_id+2] # only two grid points needed for each w0
                            if sign == -1:
                                C_Wg = Cminus_Wg[w0_id:w0_id+2] # only two grid points needed for each w0

                            C_wGG = GatherOrbitals(C_Wg, coords, wcomm).copy()
                            del C_Wg

                            # special treat of w0 = 0 (degenerate states):
                            if w0_id == 0:
                                Cplustmp_GG = GatherOrbitals(Cplus_Wg[1], coords, wcomm).copy()
                                Cminustmp_GG = GatherOrbitals(Cminus_Wg[1], coords, wcomm).copy()

                            # perform C_wGG * np.outer(rho_G.conj(), rho_G).sum(GG)

                            if w0_id == 0:
                                Sw0_G = gemmdot(C_wGG[0], rho_G, beta=0.0)
                                Sw0 = np.real(gemmdot(Sw0_G, rho_G, alpha=self.alpha, beta=0.0, trans='c'))
                                Sw1_G = gemmdot(Cplustmp_GG, rho_G, beta=0.0)
                                Sw1 = np.real(gemmdot(Sw1_G, rho_G, alpha=self.alpha, beta=0.0, trans='c'))
                                Sw2_G = gemmdot(Cminustmp_GG, rho_G, beta=0.0)
                                Sw2 = np.real(gemmdot(Sw2_G, rho_G, alpha=self.alpha, beta=0.0, trans='c'))

                                Sigma_skn[s,i,j] += Sw0
                                dSigma_skn[s,i,j] += (Sw1 + Sw2)/(2*self.dw)

                            else:                        
                                Sw1_G = gemmdot(C_wGG[0], rho_G, beta=0.0)
                                Sw1 = np.real(gemmdot(Sw1_G, rho_G, alpha=self.alpha, beta=0.0, trans='c'))
                                Sw2_G = gemmdot(C_wGG[1], rho_G, beta=0.0)
                                Sw2 = np.real(gemmdot(Sw2_G, rho_G, alpha=self.alpha, beta=0.0, trans='c'))

                                Sw0 = (w2-np.abs(w0))/self.dw * Sw1 + (np.abs(w0)-w1)/self.dw * Sw2
                                Sigma_skn[s,i,j] += np.sign(self.e_skn[s][ibzkpt1,n] - self.e_skn[s][ibzkpt2,m]) * Sw0
                                dSigma_skn[s,i,j] += (Sw2 - Sw1)/self.dw

        self.ncomm.barrier()
        self.ncomm.sum(Sigma_skn)
        self.ncomm.sum(dSigma_skn)

        return Sigma_skn, dSigma_skn

Пример #29

Показать файл

Файл: base.py Проект: yihsuanliu/gpaw

    def initialize(self):

        self.eta /= Hartree
        self.ecut /= Hartree

        calc = self.calc

        # kpoint init
        self.kd = kd = calc.wfs.kd
        self.bzk_kc = kd.bzk_kc
        self.ibzk_kc = kd.ibzk_kc
        self.nkpt = kd.nbzkpts
        self.ftol /= self.nkpt

        # band init
        if self.nbands is None:
            self.nbands = calc.wfs.nbands
        self.nvalence = calc.wfs.nvalence

        # cell init
        self.acell_cv = calc.atoms.cell / Bohr
        self.bcell_cv, self.vol, self.BZvol = get_primitive_cell(self.acell_cv)

        # grid init
        self.nG = calc.get_number_of_grid_points()
        self.nG0 = self.nG[0] * self.nG[1] * self.nG[2]
        gd = GridDescriptor(self.nG,
                            calc.wfs.gd.cell_cv,
                            pbc_c=True,
                            comm=serial_comm)
        self.gd = gd
        self.h_cv = gd.h_cv

        # obtain eigenvalues, occupations
        nibzkpt = kd.nibzkpts
        kweight_k = kd.weight_k

        try:
            self.e_kn
        except:
            self.printtxt('Use eigenvalues from the calculator.')
            self.e_kn = np.array(
                [calc.get_eigenvalues(kpt=k)
                 for k in range(nibzkpt)]) / Hartree
            self.printtxt('Eigenvalues(k=0) are:')
            print >> self.txt, self.e_kn[0] * Hartree
        self.f_kn = np.array([
            calc.get_occupation_numbers(kpt=k) / kweight_k[k]
            for k in range(nibzkpt)
        ]) / self.nkpt

        # k + q init
        assert self.q_c is not None
        self.qq_v = np.dot(self.q_c, self.bcell_cv)  # summation over c

        if self.optical_limit:
            kq_k = np.arange(self.nkpt)
            self.expqr_g = 1.
        else:
            r_vg = gd.get_grid_point_coordinates()  # (3, nG)
            qr_g = gemmdot(self.qq_v, r_vg, beta=0.0)
            self.expqr_g = np.exp(-1j * qr_g)
            del r_vg, qr_g
            kq_k = kd.find_k_plus_q(self.q_c)
        self.kq_k = kq_k

        # Plane wave init
        self.npw, self.Gvec_Gc, self.Gindex_G = set_Gvectors(
            self.acell_cv, self.bcell_cv, self.nG, self.ecut)

        # Projectors init
        setups = calc.wfs.setups
        pt = LFC(gd, [setup.pt_j for setup in setups],
                 dtype=calc.wfs.dtype,
                 forces=True)
        spos_ac = calc.atoms.get_scaled_positions()
        pt.set_k_points(self.bzk_kc)
        pt.set_positions(spos_ac)
        self.pt = pt

        # Printing calculation information
        self.print_stuff()

        return

Пример #30

Показать файл

Файл: gemv.py Проект: robwarm/gpaw-symm

t = time.time()
for n in range(numreps):
    BY1_pq = np.dot(B_pqL, Y_L)
t = time.time()-t
performance = numflop*numreps/t
print 'dot    : %8.5f s, %8.5f Mflops' % (t,performance/1024**2.)
assert np.abs(BY0_pq-BY1_pq).max()<5e-12
del BY1_pq

if test_gemmdot:
    BY2_pq = np.empty((P,Q), dtype)
    t = time.time()
    for n in range(numreps):
        BY2_pq.fill(0.0)
        gemmdot(B_pqL, Y_L, 1.0, beta, BY2_pq)
    t = time.time()-t
    performance = numflop*numreps/t
    print 'gemmdot: %8.5f s, %8.5f Mflops' % (t,performance/1024**2.)
    assert np.abs(BY0_pq-BY2_pq).max()<5e-12
    del BY2_pq

BY3_pq = np.empty((P,Q), dtype)
t = time.time()
for n in range(numreps):
    BY3_pq.fill(0.0)
    gemv(1.0, B_pqL, Y_L, beta, BY3_pq, 't')
t = time.time()-t
performance = numflop*numreps/t
print 'gemvT  : %8.5f s, %8.5f Mflops' % (t,performance/1024**2.)
assert np.abs(BY0_pq-BY3_pq).max()<5e-12

Пример #31

Показать файл

def calculate_Kxc(gd,
                  nt_sG,
                  npw,
                  Gvec_Gc,
                  nG,
                  vol,
                  bcell_cv,
                  R_av,
                  setups,
                  D_asp,
                  functional='ALDA',
                  density_cut=None):
    """ALDA kernel"""

    # The soft part
    #assert np.abs(nt_sG[0].shape - nG).sum() == 0
    if functional == 'ALDA_X':
        x_only = True
        A_x = -3. / 4. * (3. / np.pi)**(1. / 3.)
        nspins = len(nt_sG)
        assert nspins in [1, 2]
        fxc_sg = nspins**(1. / 3.) * 4. / 9. * A_x * nt_sG**(-2. / 3.)
    else:
        assert len(nt_sG) == 1
        x_only = False
        fxc_sg = np.zeros_like(nt_sG)
        xc = XC(functional[1:])
        xc.calculate_fxc(gd, nt_sG, fxc_sg)

    if density_cut is not None:
        fxc_sg[np.where(nt_sG * len(nt_sG) < density_cut)] = 0.0

    # FFT fxc(r)
    nG0 = nG[0] * nG[1] * nG[2]
    tmp_sg = [np.fft.fftn(fxc_sg[s]) * vol / nG0 for s in range(len(nt_sG))]

    r_vg = gd.get_grid_point_coordinates()
    Kxc_sGG = np.zeros((len(fxc_sg), npw, npw), dtype=complex)
    for s in range(len(fxc_sg)):
        for iG in range(npw):
            for jG in range(npw):
                dG_c = Gvec_Gc[iG] - Gvec_Gc[jG]
                if (nG / 2 - np.abs(dG_c) > 0).all():
                    index = (dG_c + nG) % nG
                    Kxc_sGG[s, iG, jG] = tmp_sg[s][index[0], index[1],
                                                   index[2]]
                else:  # not in the fft index
                    dG_v = np.dot(dG_c, bcell_cv)
                    dGr_g = gemmdot(dG_v, r_vg, beta=0.0)
                    Kxc_sGG[s, iG, jG] = gd.integrate(
                        np.exp(-1j * dGr_g) * fxc_sg[s])

    # The PAW part
    KxcPAW_sGG = np.zeros_like(Kxc_sGG)
    dG_GGv = np.zeros((npw, npw, 3))
    for iG in range(npw):
        for jG in range(npw):
            dG_c = Gvec_Gc[iG] - Gvec_Gc[jG]
            dG_GGv[iG, jG] = np.dot(dG_c, bcell_cv)

    for a, setup in enumerate(setups):
        if rank == a % size:
            rgd = setup.xc_correction.rgd
            n_qg = setup.xc_correction.n_qg
            nt_qg = setup.xc_correction.nt_qg
            nc_g = setup.xc_correction.nc_g
            nct_g = setup.xc_correction.nct_g
            Y_nL = setup.xc_correction.Y_nL
            dv_g = rgd.dv_g

            D_sp = D_asp[a]
            B_pqL = setup.xc_correction.B_pqL
            D_sLq = np.inner(D_sp, B_pqL.T)
            nspins = len(D_sp)

            f_sg = rgd.empty(nspins)
            ft_sg = rgd.empty(nspins)

            n_sLg = np.dot(D_sLq, n_qg)
            nt_sLg = np.dot(D_sLq, nt_qg)

            # Add core density
            n_sLg[:, 0] += np.sqrt(4. * np.pi) / nspins * nc_g
            nt_sLg[:, 0] += np.sqrt(4. * np.pi) / nspins * nct_g

            coefatoms_GG = np.exp(-1j * np.inner(dG_GGv, R_av[a]))
            for n, Y_L in enumerate(Y_nL):
                w = weight_n[n]
                f_sg[:] = 0.0
                n_sg = np.dot(Y_L, n_sLg)
                if x_only:
                    f_sg = nspins * (4 / 9.) * A_x * (nspins * n_sg)**(-2 / 3.)
                else:
                    xc.calculate_fxc(rgd, n_sg, f_sg)

                ft_sg[:] = 0.0
                nt_sg = np.dot(Y_L, nt_sLg)
                if x_only:
                    ft_sg = nspins * (4 / 9.) * (A_x *
                                                 (nspins * nt_sg)**(-2 / 3.))
                else:
                    xc.calculate_fxc(rgd, nt_sg, ft_sg)
                for i in range(len(rgd.r_g)):
                    coef_GG = np.exp(-1j * np.inner(dG_GGv, R_nv[n]) *
                                     rgd.r_g[i])
                    for s in range(len(f_sg)):
                        KxcPAW_sGG[s] += w * np.dot(coef_GG,
                                                    (f_sg[s, i] - ft_sg[s, i])
                                                    * dv_g[i]) * coefatoms_GG

    world.sum(KxcPAW_sGG)
    Kxc_sGG += KxcPAW_sGG

    return Kxc_sGG / vol

Пример #32

Показать файл

Файл: fxc.py Проект: robwarm/gpaw-symm

    def calculate_rkernel(self):

        gd = self.gd
        ng_c = gd.N_c
        cell_cv = gd.cell_cv
        icell_cv = 2 * np.pi * np.linalg.inv(cell_cv)
        vol = np.linalg.det(cell_cv)

        ns = self.calc.wfs.nspins
        n_g = self.n_g   # density on rough grid

        fx_g = ns * self.get_fxc_g(n_g)   # local exchange kernel
        qc_g = (-4 * np.pi * ns / fx_g)**0.5   # cutoff functional
        flocal_g = qc_g**3 * fx_g / (6 * np.pi**2)   # ren. x-kernel for r=r'
        Vlocal_g = 2 * qc_g / np.pi   # ren. Hartree kernel for r=r'

        ng = np.prod(ng_c)   # number of grid points
        r_vg = gd.get_grid_point_coordinates()
        rx_g = r_vg[0].flatten()
        ry_g = r_vg[1].flatten()
        rz_g = r_vg[2].flatten()

        prnt('    %d grid points and %d plane waves at the Gamma point' %
             (ng, self.pd.ngmax), file=self.fd)

        # Unit cells
        R_Rv = []
        weight_R = []
        nR_v = self.unit_cells
        nR = np.prod(nR_v)
        for i in range(-nR_v[0] + 1, nR_v[0]):
            for j in range(-nR_v[1] + 1, nR_v[1]):
                for h in range(-nR_v[2] + 1, nR_v[2]):
                    R_Rv.append(i * cell_cv[0] +
                                j * cell_cv[1] +
                                h * cell_cv[2])
                    weight_R.append((nR_v[0] - abs(i)) *
                                    (nR_v[1] - abs(j)) *
                                    (nR_v[2] - abs(h)) / float(nR))
        if nR > 1:
            # with more than one unit cell only the exchange kernel is
            # calculated on the grid. The bare Coulomb kernel is added
            # in PW basis and Vlocal_g only the exchange part
            dv = self.calc.density.gd.dv
            gc = (3 * dv / 4 / np.pi)**(1 / 3.)
            Vlocal_g -= 2 * np.pi * gc**2 / dv
            prnt('    Lattice point sampling: ' +
                 '(%s x %s x %s)^2 ' % (nR_v[0], nR_v[1], nR_v[2]) +
                 ' Reduced to %s lattice points' % len(R_Rv), file=self.fd)

        l_g_size = -(-ng // mpi.world.size)
        l_g_range = range(mpi.world.rank * l_g_size,
                          min((mpi.world.rank+1) * l_g_size, ng))

        fhxc_qsGr = {}
        for iq in range(len(self.ibzq_qc)):
            fhxc_qsGr[iq] = np.zeros((ns, len(self.pd.G2_qG[iq]),
                                      len(l_g_range)), dtype=complex)

        inv_error = np.seterr()
        np.seterr(invalid='ignore')
        np.seterr(divide='ignore')

        t0 = time()
        # Loop over Lattice points
        for i, R_v in enumerate(R_Rv):
            # Loop over r'. f_rr and V_rr are functions of r (dim. as r_vg[0])
            if i == 1:
                prnt('      Finished 1 cell in %s seconds' % int(time() - t0) +
                     ' - estimated %s seconds left' %
                     int((len(R_Rv) - 1) * (time() - t0)), 
                     file=self.fd)
                self.fd.flush()
            if len(R_Rv) > 5:
                if (i+1) % (len(R_Rv) / 5 + 1) == 0:
                    prnt('      Finished %s cells in %s seconds'
                         % (i, int(time() - t0))
                         + ' - estimated %s seconds left'
                         % int((len(R_Rv) - i) * (time() - t0) / i), 
                         file=self.fd)
                    self.fd.flush()
            for g in l_g_range:
                rx = rx_g[g] + R_v[0]
                ry = ry_g[g] + R_v[1]
                rz = rz_g[g] + R_v[2]

                # |r-r'-R_i|
                rr = ((r_vg[0] - rx)**2 +
                      (r_vg[1] - ry)**2 +
                      (r_vg[2] - rz)**2)**0.5

                n_av = (n_g + n_g.flatten()[g]) / 2.
                fx_g = ns * self.get_fxc_g(n_av, index=g)
                qc_g = (-4 * np.pi * ns / fx_g)**0.5
                x = qc_g * rr
                osc_x = np.sin(x) - x*np.cos(x)
                f_rr = fx_g * osc_x / (2 * np.pi**2 * rr**3)
                if nR > 1:   # include only exchange part of the kernel here
                    V_rr = (sici(x)[0] * 2 / np.pi - 1) / rr
                else:        # include the full kernel (also hartree part)
                    V_rr = (sici(x)[0] * 2 / np.pi) / rr

                # Terms with r = r'
                if (np.abs(R_v) < 0.001).all():
                    tmp_flat = f_rr.flatten()
                    tmp_flat[g] = flocal_g.flatten()[g]
                    f_rr = tmp_flat.reshape(ng_c)
                    tmp_flat = V_rr.flatten()
                    tmp_flat[g] = Vlocal_g.flatten()[g]
                    V_rr = tmp_flat.reshape(ng_c)
                    del tmp_flat

                f_rr[np.where(n_av < self.density_cut)] = 0.0
                V_rr[np.where(n_av < self.density_cut)] = 0.0

                f_rr *= weight_R[i]
                V_rr *= weight_R[i]

                # r-r'-R_i
                r_r = np.array([r_vg[0] - rx, r_vg[1] - ry, r_vg[2] - rz])

                # Fourier transform of r
                for iq, q in enumerate(self.ibzq_qc):
                    q_v = np.dot(q, icell_cv)
                    e_q = np.exp(-1j * gemmdot(q_v, r_r, beta=0.0))
                    f_q = self.pd.fft((f_rr + V_rr) * e_q, iq) * vol / ng
                    fhxc_qsGr[iq][0, :, g - l_g_range[0]] += f_q
                    if ns == 2:
                        f_q = self.pd.fft(V_rr * e_q, iq) * vol / ng
                        fhxc_qsGr[iq][1, :, g - l_g_range[0]] += f_q

        mpi.world.barrier()

        np.seterr(**inv_error)

        for iq, q in enumerate(self.ibzq_qc):
            npw = len(self.pd.G2_qG[iq])
            fhxc_sGsG = np.zeros((ns * npw, ns * npw), complex)
            l_pw_size = -(-npw // mpi.world.size)  # parallelize over PW below
            l_pw_range = range(mpi.world.rank * l_pw_size,
                               min((mpi.world.rank + 1) * l_pw_size, npw))

            if mpi.world.size > 1:
                # redistribute grid and plane waves in fhxc_qsGr[iq]
                bg1 = BlacsGrid(mpi.world, 1, mpi.world.size)
                bg2 = BlacsGrid(mpi.world, mpi.world.size, 1)
                bd1 = bg1.new_descriptor(npw, ng, npw, - (-ng / mpi.world.size))
                bd2 = bg2.new_descriptor(npw, ng, -(-npw / mpi.world.size), ng)

                fhxc_Glr = np.zeros((len(l_pw_range), ng), dtype=complex)
                if ns == 2:
                    Koff_Glr = np.zeros((len(l_pw_range), ng), dtype=complex)

                r = Redistributor(bg1.comm, bd1, bd2)
                r.redistribute(fhxc_qsGr[iq][0], fhxc_Glr, npw, ng)
                if ns == 2:
                    r.redistribute(fhxc_qsGr[iq][1], Koff_Glr, npw, ng)
            else:
                fhxc_Glr = fhxc_qsGr[iq][0]
                if ns == 2:
                    Koff_Glr = fhxc_qsGr[iq][1]

            # Fourier transform of r'
            for iG in range(len(l_pw_range)):
                f_g = fhxc_Glr[iG].reshape(ng_c)
                f_G = self.pd.fft(f_g.conj(), iq) * vol / ng
                fhxc_sGsG[l_pw_range[0] + iG, :npw] = f_G.conj()
                if ns == 2:
                    v_g = Koff_Glr[iG].reshape(ng_c)
                    v_G = self.pd.fft(v_g.conj(), iq) * vol / ng
                    fhxc_sGsG[npw + l_pw_range[0] + iG, :npw] = v_G.conj()

            if ns == 2:  # f_00 = f_11 and f_01 = f_10
                fhxc_sGsG[:npw, npw:] = fhxc_sGsG[npw:, :npw]
                fhxc_sGsG[npw:, npw:] = fhxc_sGsG[:npw, :npw]

            mpi.world.sum(fhxc_sGsG)
            fhxc_sGsG /= vol

            if mpi.rank == 0:
                w = Writer('fhxc_%s_%s_%s_%s.gpw' %
                           (self.tag, self.xc, self.ecut, iq))
                w.dimension('sG', ns * npw)
                w.add('fhxc_sGsG', ('sG', 'sG'), dtype=complex)
                if nR > 1:  # add Hartree kernel evaluated in PW basis
                    Gq2_G = self.pd.G2_qG[iq]
                    if (q == 0).all():
                        Gq2_G[0] = 1.
                    vq_G = 4 * np.pi / Gq2_G
                    fhxc_sGsG += np.tile(np.eye(npw) * vq_G, (ns, ns))
                w.fill(fhxc_sGsG)
                w.close()
            mpi.world.barrier()
        prnt(file=self.fd)

Пример #33

Показать файл

def write_overlaps(calc, seed=None, spin=0, v_knm=None):

    if seed is None:
        seed = calc.atoms.get_chemical_formula()

    if v_knm is None:
        spinors = False
    else:
        spinors = True

    bands = get_bands(seed)
    Nn = len(bands)
    kpts_kc = calc.get_bz_k_points()
    Nk = len(kpts_kc)

    nnkp = open(seed + '.nnkp', 'r')
    lines = nnkp.readlines()
    for il, line in enumerate(lines):
        if len(line.split()) > 1:
            if line.split()[0] == 'begin' and line.split()[1] == 'nnkpts':
                Nb = eval(lines[il + 1].split()[0])
                i0 = il + 2
                break

    f = open(seed + '.mmn', 'w')

    print('Kohn-Sham input generated from GPAW calculation', file=f)
    print('%10d %6d %6d' % (Nn, Nk, Nb), file=f)

    icell_cv = (2 * np.pi) * np.linalg.inv(calc.wfs.gd.cell_cv).T
    r_g = calc.wfs.gd.get_grid_point_coordinates()
    Ng = np.prod(np.shape(r_g)[1:]) * (spinors + 1)

    dO_aii = []
    for ia in calc.wfs.kpt_u[0].P_ani.keys():
        dO_ii = calc.wfs.setups[ia].dO_ii
        if spinors:
            # Spinor projections require doubling of the (identical) orbitals
            dO_jj = np.zeros((2 * len(dO_ii), 2 * len(dO_ii)), complex)
            dO_jj[::2, ::2] = dO_ii
            dO_jj[1::2, 1::2] = dO_ii
            dO_aii.append(dO_jj)
        else:
            dO_aii.append(dO_ii)

    wfs = calc.wfs

    u_knG = []
    for ik in range(Nk):
        if spinors:
            # For spinors, G denotes spin and grid: G = (s, gx, gy, gz)
            u_nG = get_spinorbit_wavefunctions(calc, ik, v_knm[ik])
            u_knG.append(u_nG[bands])
        else:
            # For non-spinors, G denotes grid: G = (gx, gy, gz)
            u_knG.append(
                np.array(
                    [wfs.get_wave_function_array(n, ik, spin) for n in bands]))

    P_kani = []
    for ik in range(Nk):
        if spinors:
            P_kani.append(get_spinorbit_projections(calc, ik, v_knm[ik]))
        else:
            P_kani.append(calc.wfs.kpt_u[spin * Nk + ik].P_ani)

    for ik1 in range(Nk):
        u1_nG = u_knG[ik1]
        for ib in range(Nb):
            # b denotes nearest neighbor k-points
            line = lines[i0 + ik1 * Nb + ib].split()
            ik2 = int(line[1]) - 1
            u2_nG = u_knG[ik2]

            G_c = np.array([int(line[i]) for i in range(2, 5)])
            bG_c = kpts_kc[ik2] - kpts_kc[ik1] + G_c
            bG_v = np.dot(bG_c, icell_cv)
            u2_nG = u2_nG * np.exp(-1.0j * gemmdot(bG_v, r_g, beta=0.0))
            M_mm = get_overlap(calc, bands, np.reshape(u1_nG,
                                                       (len(u1_nG), Ng)),
                               np.reshape(u2_nG, (len(u2_nG), Ng)),
                               P_kani[ik1], P_kani[ik2], dO_aii, bG_v)
            indices = (ik1 + 1, ik2 + 1, G_c[0], G_c[1], G_c[2])
            print('%3d %3d %4d %3d %3d' % indices, file=f)
            for m1 in range(len(M_mm)):
                for m2 in range(len(M_mm)):
                    M = M_mm[m2, m1]
                    print('%20.12f %20.12f' % (M.real, M.imag), file=f)

    f.close()

Пример #34

Показать файл

Файл: lcao.py Проект: yihsuanliu/gpaw

    def calculate_forces_by_kpoint(self, kpt, hamiltonian,
                                   F_av, tci, P_aqMi,
                                   dThetadR_vMM, dTdR_vMM, dPdR_aqvMi):
        k = kpt.k
        q = kpt.q
        mynao = self.ksl.mynao
        nao = self.ksl.nao
        dtype = self.dtype

        Mstart = self.ksl.Mstart
        Mstop = self.ksl.Mstop
        
        basis_functions = self.basis_functions
        my_atom_indices = basis_functions.my_atom_indices
        atom_indices = basis_functions.atom_indices        
        
        def _slices(indices):
            for a in indices:
                M1 = basis_functions.M_a[a] - Mstart
                M2 = M1 + self.setups[a].niAO
                yield a, M1, M2
        
        def slices():
            return _slices(atom_indices)
        
        def my_slices():
            return _slices(my_atom_indices)
        
        #
        #         -----                    -----
        #          \    -1                  \    *
        # E      =  )  S     H    rho     =  )  c     eps  f  c
        #  mu nu   /    mu x  x z    z nu   /    n mu    n  n  n nu
        #         -----                    -----
        #          x z                       n
        #
        # We use the transpose of that matrix.  The first form is used
        # if rho is given, otherwise the coefficients are used.
        self.timer.start('LCAO forces: initial')
        if kpt.rho_MM is None:
            rhoT_MM = self.ksl.get_transposed_density_matrix(kpt.f_n, kpt.C_nM)
            ET_MM = self.ksl.get_transposed_density_matrix(kpt.f_n * kpt.eps_n,
                                                           kpt.C_nM)
            if hasattr(kpt, 'c_on'):
                assert self.bd.comm.size == 1
                d_nn = np.zeros((self.bd.mynbands, self.bd.mynbands), dtype=kpt.C_nM.dtype)
                for ne, c_n in zip(kpt.ne_o, kpt.c_on):
                        d_nn += ne * np.outer(c_n.conj(), c_n)
                rhoT_MM += self.ksl.get_transposed_density_matrix_delta(d_nn, kpt.C_nM)
                ET_MM+=self.ksl.get_transposed_density_matrix_delta(d_nn*kpt.eps_n, kpt.C_nM)
        else:
            H_MM = self.eigensolver.calculate_hamiltonian_matrix(hamiltonian,
                                                                 self,
                                                                 kpt)
            tri2full(H_MM)
            S_MM = self.S_qMM[q].copy()
            tri2full(S_MM)
            ET_MM = np.linalg.solve(S_MM, gemmdot(H_MM, kpt.rho_MM)).T.copy()
            del S_MM, H_MM
            rhoT_MM = kpt.rho_MM.T.copy()
        self.timer.stop('LCAO forces: initial')

        
        # Kinetic energy contribution
        #
        #           ----- d T
        #  a         \       mu nu
        # F += 2 Re   )   -------- rho
        #            /    d R         nu mu
        #           -----    mu nu
        #        mu in a; nu
        #
        Fkin_av = np.zeros_like(F_av)
        dEdTrhoT_vMM = (dTdR_vMM * rhoT_MM[np.newaxis]).real
        for a, M1, M2 in my_slices():
            Fkin_av[a, :] = 2 * dEdTrhoT_vMM[:, M1:M2].sum(-1).sum(-1)
        del dEdTrhoT_vMM
        
        # Potential contribution
        #
        #           -----      /  d Phi  (r)
        #  a         \        |        mu    ~
        # F += -2 Re  )       |   ---------- v (r)  Phi  (r) dr rho
        #            /        |     d R                nu          nu mu
        #           -----    /         a
        #        mu in a; nu
        #
        self.timer.start('LCAO forces: potential')
        Fpot_av = np.zeros_like(F_av)
        vt_G = hamiltonian.vt_sG[kpt.s]
        DVt_vMM = np.zeros((3, mynao, nao), dtype)
        # Note that DVt_vMM contains dPhi(r) / dr = - dPhi(r) / dR^a
        basis_functions.calculate_potential_matrix_derivative(vt_G, DVt_vMM, q)
        
        for a, M1, M2 in slices():
            for v in range(3):
                Fpot_av[a, v] = 2 * (DVt_vMM[v, M1:M2, :]
                                     * rhoT_MM[M1:M2, :]).real.sum()
        del DVt_vMM
        self.timer.stop('LCAO forces: potential')
        
        # Density matrix contribution due to basis overlap
        #
        #            ----- d Theta
        #  a          \           mu nu
        # F  += -2 Re  )   ------------  E
        #             /        d R        nu mu
        #            -----        mu nu
        #         mu in a; nu
        #
        Frho_av = np.zeros_like(F_av)
        dThetadRE_vMM = (dThetadR_vMM * ET_MM[np.newaxis]).real
        for a, M1, M2 in my_slices():
            Frho_av[a, :] = -2 * dThetadRE_vMM[:, M1:M2].sum(-1).sum(-1)
        del dThetadRE_vMM

        # Density matrix contribution from PAW correction
        #
        #           -----                        -----
        #  a         \      a                     \     b
        # F +=  2 Re  )    Z      E        - 2 Re  )   Z      E
        #            /      mu nu  nu mu          /     mu nu  nu mu
        #           -----                        -----
        #           mu nu                    b; mu in a; nu
        #
        # with
        #                  b*
        #         -----  dP
        #   b      \       i mu    b   b
        #  Z     =  )   -------- dS   P
        #   mu nu  /     dR        ij  j nu
        #         -----    b mu
        #           ij
        #
        self.timer.start('LCAO forces: paw correction')
        dPdR_avMi = dict([(a, dPdR_aqvMi[a][q]) for a in my_atom_indices])
        work_MM = np.zeros((mynao, nao), dtype)
        ZE_MM = None
        for b in my_atom_indices:
            setup = self.setups[b]
            dO_ii = np.asarray(setup.dO_ii, dtype)
            dOP_iM = np.zeros((setup.ni, nao), dtype)
            gemm(1.0, self.P_aqMi[b][q], dO_ii, 0.0, dOP_iM, 'c')
            for v in range(3):
                gemm(1.0, dOP_iM, dPdR_avMi[b][v][Mstart:Mstop], 0.0,
                     work_MM, 'n')
                ZE_MM = (work_MM * ET_MM).real
                for a, M1, M2 in slices():
                    dE = 2 * ZE_MM[M1:M2].sum()
                    Frho_av[a, v] -= dE # the "b; mu in a; nu" term
                    Frho_av[b, v] += dE # the "mu nu" term
        del work_MM, ZE_MM
        self.timer.stop('LCAO forces: paw correction')
        
        # Atomic density contribution
        #            -----                         -----
        #  a          \     a                       \     b
        # F  += -2 Re  )   A      rho       + 2 Re   )   A      rho
        #             /     mu nu    nu mu          /     mu nu    nu mu
        #            -----                         -----
        #            mu nu                     b; mu in a; nu
        #
        #                  b*
        #         ----- d P
        #  b       \       i mu   b   b
        # A     =   )   ------- dH   P
        #  mu nu   /    d R       ij  j nu
        #         -----    b mu
        #           ij
        #
        self.timer.start('LCAO forces: atomic density')
        Fatom_av = np.zeros_like(F_av)
        for b in my_atom_indices:
            H_ii = np.asarray(unpack(hamiltonian.dH_asp[b][kpt.s]), dtype)
            HP_iM = gemmdot(H_ii, np.conj(self.P_aqMi[b][q].T))
            for v in range(3):
                dPdR_Mi = dPdR_avMi[b][v][Mstart:Mstop]
                ArhoT_MM = (gemmdot(dPdR_Mi, HP_iM) * rhoT_MM).real
                for a, M1, M2 in slices():
                    dE = 2 * ArhoT_MM[M1:M2].sum()
                    Fatom_av[a, v] += dE # the "b; mu in a; nu" term
                    Fatom_av[b, v] -= dE # the "mu nu" term
        self.timer.stop('LCAO forces: atomic density')
        
        F_av += Fkin_av + Fpot_av + Frho_av + Fatom_av

Пример #35

Показать файл

Файл: lcao.py Проект: Xu-Kai/lotsofcoresbook2code

    def calculate_forces(self, hamiltonian, F_av):
        self.timer.start('LCAO forces')

        spos_ac = self.tci.atoms.get_scaled_positions() % 1.0
        ksl = self.ksl
        nao = ksl.nao
        mynao = ksl.mynao
        nq = len(self.kd.ibzk_qc)
        dtype = self.dtype
        tci = self.tci
        gd = self.gd
        bfs = self.basis_functions

        Mstart = ksl.Mstart
        Mstop = ksl.Mstop

        from gpaw.kohnsham_layouts import BlacsOrbitalLayouts
        isblacs = isinstance(ksl, BlacsOrbitalLayouts)  # XXX

        if not isblacs:
            self.timer.start('TCI derivative')
            dThetadR_qvMM = np.empty((nq, 3, mynao, nao), dtype)
            dTdR_qvMM = np.empty((nq, 3, mynao, nao), dtype)
            dPdR_aqvMi = {}
            for a in self.basis_functions.my_atom_indices:
                ni = self.setups[a].ni
                dPdR_aqvMi[a] = np.empty((nq, 3, nao, ni), dtype)
            tci.calculate_derivative(spos_ac, dThetadR_qvMM, dTdR_qvMM,
                                     dPdR_aqvMi)
            gd.comm.sum(dThetadR_qvMM)
            gd.comm.sum(dTdR_qvMM)
            self.timer.stop('TCI derivative')

            my_atom_indices = bfs.my_atom_indices
            atom_indices = bfs.atom_indices

            def _slices(indices):
                for a in indices:
                    M1 = bfs.M_a[a] - Mstart
                    M2 = M1 + self.setups[a].nao
                    if M2 > 0:
                        yield a, max(0, M1), M2

            def slices():
                return _slices(atom_indices)

            def my_slices():
                return _slices(my_atom_indices)

        #
        #         -----                    -----
        #          \    -1                  \    *
        # E      =  )  S     H    rho     =  )  c     eps  f  c
        #  mu nu   /    mu x  x z    z nu   /    n mu    n  n  n nu
        #         -----                    -----
        #          x z                       n
        #
        # We use the transpose of that matrix.  The first form is used
        # if rho is given, otherwise the coefficients are used.
        self.timer.start('Initial')

        rhoT_uMM = []
        ET_uMM = []

        if not isblacs:
            if self.kpt_u[0].rho_MM is None:
                self.timer.start('Get density matrix')
                for kpt in self.kpt_u:
                    rhoT_MM = ksl.get_transposed_density_matrix(
                        kpt.f_n, kpt.C_nM)
                    rhoT_uMM.append(rhoT_MM)
                    ET_MM = ksl.get_transposed_density_matrix(
                        kpt.f_n * kpt.eps_n, kpt.C_nM)
                    ET_uMM.append(ET_MM)

                    if hasattr(kpt, 'c_on'):
                        # XXX does this work with BLACS/non-BLACS/etc.?
                        assert self.bd.comm.size == 1
                        d_nn = np.zeros((self.bd.mynbands, self.bd.mynbands),
                                        dtype=kpt.C_nM.dtype)
                        for ne, c_n in zip(kpt.ne_o, kpt.c_on):
                            d_nn += ne * np.outer(c_n.conj(), c_n)
                        rhoT_MM += ksl.get_transposed_density_matrix_delta(\
                            d_nn, kpt.C_nM)
                        ET_MM += ksl.get_transposed_density_matrix_delta(\
                            d_nn * kpt.eps_n, kpt.C_nM)
                self.timer.stop('Get density matrix')
            else:
                rhoT_uMM = []
                ET_uMM = []
                for kpt in self.kpt_u:
                    H_MM = self.eigensolver.calculate_hamiltonian_matrix(\
                        hamiltonian, self, kpt)
                    tri2full(H_MM)
                    S_MM = kpt.S_MM.copy()
                    tri2full(S_MM)
                    ET_MM = np.linalg.solve(S_MM,
                                            gemmdot(H_MM,
                                                    kpt.rho_MM)).T.copy()
                    del S_MM, H_MM
                    rhoT_MM = kpt.rho_MM.T.copy()
                    rhoT_uMM.append(rhoT_MM)
                    ET_uMM.append(ET_MM)
        self.timer.stop('Initial')

        if isblacs:  # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
            from gpaw.blacs import BlacsGrid, Redistributor

            def get_density_matrix(f_n, C_nM, redistributor):
                rho1_mm = ksl.calculate_blocked_density_matrix(f_n,
                                                               C_nM).conj()
                rho_mm = redistributor.redistribute(rho1_mm)
                return rho_mm

            pcutoff_a = [
                max([pt.get_cutoff() for pt in setup.pt_j])
                for setup in self.setups
            ]
            phicutoff_a = [
                max([phit.get_cutoff() for phit in setup.phit_j])
                for setup in self.setups
            ]

            # XXX should probably use bdsize x gdsize instead
            # That would be consistent with some existing grids
            grid = BlacsGrid(ksl.block_comm, self.gd.comm.size,
                             self.bd.comm.size)

            blocksize1 = -(-nao // grid.nprow)
            blocksize2 = -(-nao // grid.npcol)
            # XXX what are rows and columns actually?
            desc = grid.new_descriptor(nao, nao, blocksize1, blocksize2)

            rhoT_umm = []
            ET_umm = []
            redistributor = Redistributor(grid.comm, ksl.mmdescriptor, desc)
            Fpot_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                self.timer.start('Get density matrix')
                rhoT_mm = get_density_matrix(kpt.f_n, kpt.C_nM, redistributor)
                rhoT_umm.append(rhoT_mm)
                self.timer.stop('Get density matrix')

                self.timer.start('Potential')
                rhoT_mM = ksl.distribute_to_columns(rhoT_mm, desc)

                vt_G = hamiltonian.vt_sG[kpt.s]
                Fpot_av += bfs.calculate_force_contribution(
                    vt_G, rhoT_mM, kpt.q)
                del rhoT_mM
                self.timer.stop('Potential')

            self.timer.start('Get density matrix')
            for kpt in self.kpt_u:
                ET_mm = get_density_matrix(kpt.f_n * kpt.eps_n, kpt.C_nM,
                                           redistributor)
                ET_umm.append(ET_mm)
            self.timer.stop('Get density matrix')

            M1start = blocksize1 * grid.myrow
            M2start = blocksize2 * grid.mycol

            M1stop = min(M1start + blocksize1, nao)
            M2stop = min(M2start + blocksize2, nao)

            m1max = M1stop - M1start
            m2max = M2stop - M2start

        if not isblacs:
            # Kinetic energy contribution
            #
            #           ----- d T
            #  a         \       mu nu
            # F += 2 Re   )   -------- rho
            #            /    d R         nu mu
            #           -----    mu nu
            #        mu in a; nu
            #
            Fkin_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                dEdTrhoT_vMM = (dTdR_qvMM[kpt.q] *
                                rhoT_uMM[u][np.newaxis]).real
                for a, M1, M2 in my_slices():
                    Fkin_av[a, :] += \
                        2.0 * dEdTrhoT_vMM[:, M1:M2].sum(-1).sum(-1)
            del dEdTrhoT_vMM

            # Density matrix contribution due to basis overlap
            #
            #            ----- d Theta
            #  a          \           mu nu
            # F  += -2 Re  )   ------------  E
            #             /        d R        nu mu
            #            -----        mu nu
            #         mu in a; nu
            #
            Ftheta_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                dThetadRE_vMM = (dThetadR_qvMM[kpt.q] *
                                 ET_uMM[u][np.newaxis]).real
                for a, M1, M2 in my_slices():
                    Ftheta_av[a, :] += \
                        -2.0 * dThetadRE_vMM[:, M1:M2].sum(-1).sum(-1)
            del dThetadRE_vMM

        if isblacs:
            from gpaw.lcao.overlap import TwoCenterIntegralCalculator
            self.timer.start('Prepare TCI loop')
            M_a = bfs.M_a

            Fkin2_av = np.zeros_like(F_av)
            Ftheta2_av = np.zeros_like(F_av)

            cell_cv = tci.atoms.cell
            spos_ac = tci.atoms.get_scaled_positions() % 1.0

            overlapcalc = TwoCenterIntegralCalculator(self.kd.ibzk_qc,
                                                      derivative=False)

            # XXX this is not parallel *AT ALL*.
            self.timer.start('Get neighbors')
            nl = tci.atompairs.pairs.neighbors
            r_and_offset_aao = get_r_and_offsets(nl, spos_ac, cell_cv)
            atompairs = r_and_offset_aao.keys()
            atompairs.sort()
            self.timer.stop('Get neighbors')

            T_expansions = tci.T_expansions
            Theta_expansions = tci.Theta_expansions
            P_expansions = tci.P_expansions
            nq = len(self.kd.ibzk_qc)

            dH_asp = hamiltonian.dH_asp

            self.timer.start('broadcast dH')
            alldH_asp = {}
            for a in range(len(self.setups)):
                gdrank = bfs.sphere_a[a].rank
                if gdrank == gd.rank:
                    dH_sp = dH_asp[a]
                else:
                    ni = self.setups[a].ni
                    dH_sp = np.empty((self.nspins, ni * (ni + 1) // 2))
                gd.comm.broadcast(dH_sp, gdrank)
                # okay, now everyone gets copies of dH_sp
                alldH_asp[a] = dH_sp
            self.timer.stop('broadcast dH')

            # This will get sort of hairy.  We need to account for some
            # three-center overlaps, such as:
            #
            #         a1
            #      Phi   ~a3    a3  ~a3     a2     a2,a1
            #   < ----  |p  > dH   <p   |Phi  > rho
            #      dR
            #
            # To this end we will loop over all pairs of atoms (a1, a3),
            # and then a sub-loop over (a3, a2).
            from gpaw.lcao.overlap import DerivativeAtomicDisplacement

            class Displacement(DerivativeAtomicDisplacement):
                def __init__(self, a1, a2, R_c, offset):
                    phases = overlapcalc.phaseclass(overlapcalc.ibzk_qc,
                                                    offset)
                    DerivativeAtomicDisplacement.__init__(
                        self, None, a1, a2, R_c, offset, phases)

            # Cache of Displacement objects with spherical harmonics with
            # evaluated spherical harmonics.
            disp_aao = {}

            def get_displacements(a1, a2, maxdistance):
                # XXX the way maxdistance is handled it can lead to
                # bad caching when different maxdistances are passed
                # to subsequent calls with same pair of atoms
                disp_o = disp_aao.get((a1, a2))
                if disp_o is None:
                    disp_o = []
                    for R_c, offset in r_and_offset_aao[(a1, a2)]:
                        if np.linalg.norm(R_c) > maxdistance:
                            continue
                        disp = Displacement(a1, a2, R_c, offset)
                        disp_o.append(disp)
                    disp_aao[(a1, a2)] = disp_o
                return [disp for disp in disp_o if disp.r < maxdistance]

            self.timer.stop('Prepare TCI loop')
            self.timer.start('Not so complicated loop')

            for (a1, a2) in atompairs:
                if a1 >= a2:
                    # Actually this leads to bad load balance.
                    # We should take a1 > a2 or a1 < a2 equally many times.
                    # Maybe decide which of these choices
                    # depending on whether a2 % 1 == 0
                    continue

                m1start = M_a[a1] - M1start
                m2start = M_a[a2] - M2start
                if m1start >= blocksize1 or m2start >= blocksize2:
                    continue  # (we have only one block per CPU)

                T_expansion = T_expansions.get(a1, a2)
                Theta_expansion = Theta_expansions.get(a1, a2)
                #P_expansion = P_expansions.get(a1, a2)
                nm1, nm2 = T_expansion.shape

                m1stop = min(m1start + nm1, m1max)
                m2stop = min(m2start + nm2, m2max)

                if m1stop <= 0 or m2stop <= 0:
                    continue

                m1start = max(m1start, 0)
                m2start = max(m2start, 0)
                J1start = max(0, M1start - M_a[a1])
                J2start = max(0, M2start - M_a[a2])
                M1stop = J1start + m1stop - m1start
                J2stop = J2start + m2stop - m2start

                dTdR_qvmm = T_expansion.zeros((nq, 3), dtype=dtype)
                dThetadR_qvmm = Theta_expansion.zeros((nq, 3), dtype=dtype)

                disp_o = get_displacements(a1, a2,
                                           phicutoff_a[a1] + phicutoff_a[a2])
                for disp in disp_o:
                    disp.evaluate_overlap(T_expansion, dTdR_qvmm)
                    disp.evaluate_overlap(Theta_expansion, dThetadR_qvmm)

                for u, kpt in enumerate(self.kpt_u):
                    rhoT_mm = rhoT_umm[u][m1start:m1stop, m2start:m2stop]
                    ET_mm = ET_umm[u][m1start:m1stop, m2start:m2stop]
                    Fkin_v = 2.0 * (
                        dTdR_qvmm[kpt.q][:, J1start:M1stop, J2start:J2stop] *
                        rhoT_mm[np.newaxis]).real.sum(-1).sum(-1)
                    Ftheta_v = 2.0 * (dThetadR_qvmm[kpt.q][:, J1start:M1stop,
                                                           J2start:J2stop] *
                                      ET_mm[np.newaxis]).real.sum(-1).sum(-1)
                    Fkin2_av[a1] += Fkin_v
                    Fkin2_av[a2] -= Fkin_v
                    Ftheta2_av[a1] -= Ftheta_v
                    Ftheta2_av[a2] += Ftheta_v

            Fkin_av = Fkin2_av
            Ftheta_av = Ftheta2_av
            self.timer.stop('Not so complicated loop')

            dHP_and_dSP_aauim = {}

            a2values = {}
            for (a2, a3) in atompairs:
                if not a3 in a2values:
                    a2values[a3] = []
                a2values[a3].append(a2)

            Fatom_av = np.zeros_like(F_av)
            Frho_av = np.zeros_like(F_av)
            self.timer.start('Complicated loop')
            for a1, a3 in atompairs:
                if a1 == a3:
                    # Functions reside on same atom, so their overlap
                    # does not change when atom is displaced
                    continue
                m1start = M_a[a1] - M1start
                if m1start >= blocksize1:
                    continue

                P_expansion = P_expansions.get(a1, a3)
                nm1 = P_expansion.shape[0]
                m1stop = min(m1start + nm1, m1max)
                if m1stop <= 0:
                    continue

                m1start = max(m1start, 0)
                J1start = max(0, M1start - M_a[a1])
                J1stop = J1start + m1stop - m1start

                disp_o = get_displacements(a1, a3,
                                           phicutoff_a[a1] + pcutoff_a[a3])
                if len(disp_o) == 0:
                    continue

                dPdR_qvmi = P_expansion.zeros((nq, 3), dtype=dtype)
                for disp in disp_o:
                    disp.evaluate_overlap(P_expansion, dPdR_qvmi)

                dPdR_qvmi = dPdR_qvmi[:, :, J1start:J1stop, :].copy()
                for a2 in a2values[a3]:
                    m2start = M_a[a2] - M2start
                    if m2start >= blocksize2:
                        continue

                    P_expansion2 = P_expansions.get(a2, a3)
                    nm2 = P_expansion2.shape[0]
                    m2stop = min(m2start + nm2, m2max)
                    if m2stop <= 0:
                        continue

                    disp_o = get_displacements(a2, a3,
                                               phicutoff_a[a2] + pcutoff_a[a3])
                    if len(disp_o) == 0:
                        continue

                    m2start = max(m2start, 0)
                    J2start = max(0, M2start - M_a[a2])
                    J2stop = J2start + m2stop - m2start

                    if (a2, a3) in dHP_and_dSP_aauim:
                        dHP_uim, dSP_uim = dHP_and_dSP_aauim[(a2, a3)]
                    else:
                        P_qmi = P_expansion2.zeros((nq, ), dtype=dtype)
                        for disp in disp_o:
                            disp.evaluate_direct(P_expansion2, P_qmi)
                        P_qmi = P_qmi[:, J2start:J2stop].copy()
                        dH_sp = alldH_asp[a3]
                        dS_ii = self.setups[a3].dO_ii

                        dHP_uim = []
                        dSP_uim = []
                        for u, kpt in enumerate(self.kpt_u):
                            dH_ii = unpack(dH_sp[kpt.s])
                            dHP_im = np.dot(P_qmi[kpt.q], dH_ii).T.conj()
                            # XXX only need nq of these
                            dSP_im = np.dot(P_qmi[kpt.q], dS_ii).T.conj()
                            dHP_uim.append(dHP_im)
                            dSP_uim.append(dSP_im)
                            dHP_and_dSP_aauim[(a2, a3)] = dHP_uim, dSP_uim

                    for u, kpt in enumerate(self.kpt_u):
                        rhoT_mm = rhoT_umm[u][m1start:m1stop, m2start:m2stop]
                        ET_mm = ET_umm[u][m1start:m1stop, m2start:m2stop]
                        dPdRdHP_vmm = np.dot(dPdR_qvmi[kpt.q], dHP_uim[u])
                        dPdRdSP_vmm = np.dot(dPdR_qvmi[kpt.q], dSP_uim[u])

                        Fatom_c = 2.0 * (dPdRdHP_vmm *
                                         rhoT_mm).real.sum(-1).sum(-1)
                        Frho_c = 2.0 * (dPdRdSP_vmm *
                                        ET_mm).real.sum(-1).sum(-1)
                        Fatom_av[a1] += Fatom_c
                        Fatom_av[a3] -= Fatom_c

                        Frho_av[a1] -= Frho_c
                        Frho_av[a3] += Frho_c

            self.timer.stop('Complicated loop')

        if not isblacs:
            # Potential contribution
            #
            #           -----      /  d Phi  (r)
            #  a         \        |        mu    ~
            # F += -2 Re  )       |   ---------- v (r)  Phi  (r) dr rho
            #            /        |     d R                nu          nu mu
            #           -----    /         a
            #        mu in a; nu
            #
            self.timer.start('Potential')
            Fpot_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                vt_G = hamiltonian.vt_sG[kpt.s]
                Fpot_av += bfs.calculate_force_contribution(
                    vt_G, rhoT_uMM[u], kpt.q)
            self.timer.stop('Potential')

            # Density matrix contribution from PAW correction
            #
            #           -----                        -----
            #  a         \      a                     \     b
            # F +=  2 Re  )    Z      E        - 2 Re  )   Z      E
            #            /      mu nu  nu mu          /     mu nu  nu mu
            #           -----                        -----
            #           mu nu                    b; mu in a; nu
            #
            # with
            #                  b*
            #         -----  dP
            #   b      \       i mu    b   b
            #  Z     =  )   -------- dS   P
            #   mu nu  /     dR        ij  j nu
            #         -----    b mu
            #           ij
            #
            self.timer.start('Paw correction')
            Frho_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                work_MM = np.zeros((mynao, nao), dtype)
                ZE_MM = None
                for b in my_atom_indices:
                    setup = self.setups[b]
                    dO_ii = np.asarray(setup.dO_ii, dtype)
                    dOP_iM = np.zeros((setup.ni, nao), dtype)
                    gemm(1.0, self.P_aqMi[b][kpt.q], dO_ii, 0.0, dOP_iM, 'c')
                    for v in range(3):
                        gemm(1.0, dOP_iM,
                             dPdR_aqvMi[b][kpt.q][v][Mstart:Mstop], 0.0,
                             work_MM, 'n')
                        ZE_MM = (work_MM * ET_uMM[u]).real
                        for a, M1, M2 in slices():
                            dE = 2 * ZE_MM[M1:M2].sum()
                            Frho_av[a, v] -= dE  # the "b; mu in a; nu" term
                            Frho_av[b, v] += dE  # the "mu nu" term
            del work_MM, ZE_MM
            self.timer.stop('Paw correction')

            # Atomic density contribution
            #            -----                         -----
            #  a          \     a                       \     b
            # F  += -2 Re  )   A      rho       + 2 Re   )   A      rho
            #             /     mu nu    nu mu          /     mu nu    nu mu
            #            -----                         -----
            #            mu nu                     b; mu in a; nu
            #
            #                  b*
            #         ----- d P
            #  b       \       i mu   b   b
            # A     =   )   ------- dH   P
            #  mu nu   /    d R       ij  j nu
            #         -----    b mu
            #           ij
            #
            self.timer.start('Atomic Hamiltonian force')
            Fatom_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                for b in my_atom_indices:
                    H_ii = np.asarray(unpack(hamiltonian.dH_asp[b][kpt.s]),
                                      dtype)
                    HP_iM = gemmdot(
                        H_ii,
                        np.ascontiguousarray(self.P_aqMi[b][kpt.q].T.conj()))
                    for v in range(3):
                        dPdR_Mi = dPdR_aqvMi[b][kpt.q][v][Mstart:Mstop]
                        ArhoT_MM = (gemmdot(dPdR_Mi, HP_iM) * rhoT_uMM[u]).real
                        for a, M1, M2 in slices():
                            dE = 2 * ArhoT_MM[M1:M2].sum()
                            Fatom_av[a, v] += dE  # the "b; mu in a; nu" term
                            Fatom_av[b, v] -= dE  # the "mu nu" term
            self.timer.stop('Atomic Hamiltonian force')

        F_av += Fkin_av + Fpot_av + Ftheta_av + Frho_av + Fatom_av
        self.timer.start('Wait for sum')
        ksl.orbital_comm.sum(F_av)
        if self.bd.comm.rank == 0:
            self.kd.comm.sum(F_av, 0)
        self.timer.stop('Wait for sum')
        self.timer.stop('LCAO forces')