Python TwoCenterIntegralCalculator примеры использования

Язык программирования: Python

Пространство имен/Пакет: gpaw.lcao.overlap

Примеров на hotexamples.com: 7

Python TwoCenterIntegralCalculator - 7 примеров найдено. Это лучшие примеры Python кода для gpaw.lcao.overlap.TwoCenterIntegralCalculator, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

TwoCenterIntegralCalculator(4)

calculate(2)

phaseclass(1)

Пример #1

Показать файл

Файл: electronphonon.py Проект: Xu-Kai/lotsofcoresbook2code

    def calculate_dP_aqvMi(self, wfs):
        """Overlap between LCAO basis functions and gradient of projectors.

        Only the gradient wrt the atomic positions in the reference cell is
        computed.

        """

        nao = wfs.setups.nao
        nq = len(wfs.ibzk_qc)
        atoms = [self.atoms[i] for i in self.indices]

        # Derivatives in reference cell
        dP_aqvMi = {}
        for atom, setup in zip(atoms, wfs.setups):
            a = atom.index
            dP_aqvMi[a] = np.zeros((nq, 3, nao, setup.ni), wfs.dtype)

        # Calculate overlap between basis function and gradient of projectors
        # NOTE: the derivative is calculated wrt the atomic position and not
        # the real-space coordinate
        calc = TwoCenterIntegralCalculator(wfs.ibzk_qc, derivative=True)
        expansions = ManySiteDictionaryWrapper(wfs.tci.P_expansions, dP_aqvMi)
        calc.calculate(wfs.tci.atompairs, [expansions], [dP_aqvMi])

        # Extract derivatives in the reference unit cell
        # dP_aqvMi = {}
        # for atom in self.atoms:
        #     dP_aqvMi[atom.index] = dPall_aqvMi[atom.index]

        return dP_aqvMi

Пример #2

Показать файл

Файл: electronphonon.py Проект: robwarm/gpaw-symm

    def calculate_dP_aqvMi(self, wfs):
        """Overlap between LCAO basis functions and gradient of projectors.

        Only the gradient wrt the atomic positions in the reference cell is
        computed.

        """
        
        nao = wfs.setups.nao
        nq = len(wfs.ibzk_qc)
        atoms = [self.atoms[i] for i in self.indices]
        
        # Derivatives in reference cell
        dP_aqvMi = {}
        for atom, setup in zip(atoms, wfs.setups):
            a = atom.index
            dP_aqvMi[a] = np.zeros((nq, 3, nao, setup.ni), wfs.dtype)

        # Calculate overlap between basis function and gradient of projectors
        # NOTE: the derivative is calculated wrt the atomic position and not
        # the real-space coordinate
        calc = TwoCenterIntegralCalculator(wfs.ibzk_qc, derivative=True)
        expansions = ManySiteDictionaryWrapper(wfs.tci.P_expansions, dP_aqvMi)
        calc.calculate(wfs.tci.atompairs, [expansions], [dP_aqvMi])

        # Extract derivatives in the reference unit cell
        # dP_aqvMi = {}
        # for atom in self.atoms:
        #     dP_aqvMi[atom.index] = dPall_aqvMi[atom.index]
            
        return dP_aqvMi

Пример #3

Показать файл

def get_tci_dP_aMix(spos_ac, wfs, q, *args, **kwargs):
    # container for spline expansions of basis function-projector pairs
    # (note to self: remember to conjugate/negate because of that)
    from gpaw.lcao.overlap import ManySiteDictionaryWrapper,\
        TwoCenterIntegralCalculator, NewTwoCenterIntegrals

    if not isinstance(wfs.tci, NewTwoCenterIntegrals):
        raise RuntimeError('Please remember --gpaw=usenewtci=True')

    dP_aqxMi = {}
    nao = wfs.setups.nao
    nq = len(wfs.ibzk_qc)
    for a, setup in enumerate(wfs.setups):
        dP_aqxMi[a] = np.zeros((nq, 3, nao, setup.ni), wfs.dtype)

    calc = TwoCenterIntegralCalculator(wfs.ibzk_qc, derivative=True)
    expansions = ManySiteDictionaryWrapper(wfs.tci.P_expansions, dP_aqxMi)
    calc.calculate(wfs.tci.atompairs, [expansions], [dP_aqxMi])

    dP_aMix = {}
    for a in dP_aqxMi:
        dP_aMix[a] = dP_aqxMi[a].transpose(0, 2, 3, 1).copy()[q]  # XXX q
    return dP_aMix

Пример #4

Показать файл

Файл: el_ph.py Проект: eojons/gpaw-scme

def get_tci_dP_aMix(spos_ac, wfs, q, *args, **kwargs):
    # container for spline expansions of basis function-projector pairs
    # (note to self: remember to conjugate/negate because of that)
    from gpaw.lcao.overlap import ManySiteDictionaryWrapper,\
         TwoCenterIntegralCalculator, NewTwoCenterIntegrals

    if not isinstance(wfs.tci, NewTwoCenterIntegrals):
        raise RuntimeError('Please remember --gpaw=usenewtci=True')

    dP_aqxMi = {}
    nao = wfs.setups.nao
    nq = len(wfs.ibzk_qc)
    for a, setup in enumerate(wfs.setups):
        dP_aqxMi[a] = np.zeros((nq, 3, nao, setup.ni), wfs.dtype)
    
    calc = TwoCenterIntegralCalculator(wfs.ibzk_qc, derivative=True)
    expansions = ManySiteDictionaryWrapper(wfs.tci.P_expansions, dP_aqxMi)
    calc.calculate(wfs.tci.atompairs, [expansions], [dP_aqxMi])

    dP_aMix = {}
    for a in dP_aqxMi:
        dP_aMix[a] = dP_aqxMi[a].transpose(0, 2, 3, 1).copy()[q] # XXX q
    return dP_aMix

Пример #5

Показать файл

Файл: newoverlap.py Проект: thonmaker/gpaw

def newoverlap(wfs, spos_ac):
    assert wfs.ksl.block_comm.size == wfs.gd.comm.size * wfs.bd.comm.size
    #even_part = EvenPartitioning(wfs.gd.comm, #wfs.ksl.block_comm,
    #                             len(wfs.atom_partition.rank_a))
    #atom_partition = even_part.as_atom_partition()
    # XXXXXXXXXXXXXXX
    atom_partition = wfs.atom_partition

    tci = wfs.tci

    gd = wfs.gd
    kd = wfs.kd
    nq = len(kd.ibzk_qc)

    # New neighbor list because we want it "both ways", heh.  Or do we?
    neighbors = NeighborList(tci.cutoff_a,
                             skin=0,
                             sorted=True,
                             self_interaction=True,
                             bothways=False)
    atoms = Atoms('X%d' % len(tci.cutoff_a), cell=gd.cell_cv, pbc=gd.pbc_c)
    atoms.set_scaled_positions(spos_ac)
    neighbors.update(atoms)

    # XXX
    pcutoff_a = []
    phicutoff_a = []
    for setup in wfs.setups:
        if setup.pt_j:
            pcutoff = max([pt.get_cutoff() for pt in setup.pt_j])
        else:
            pcutoff = 0.0
        if setup.phit_j:
            phicutoff = max([phit.get_cutoff() for phit in setup.phit_j])
        else:
            phicutoff = 0.0
        pcutoff_a.append(pcutoff)
        phicutoff_a.append(phicutoff)

    # Calculate the projector--basis function overlaps:
    #
    #    a1        ~a1
    #   P      = < p   | Phi   > ,
    #    i mu       i       mu
    #
    # i.e. projector is on a1 and basis function is on what we will call a2.

    overlapcalc = TwoCenterIntegralCalculator(wfs.kd.ibzk_qc, derivative=False)

    P_aaqim = {}  # keys: (a1, a2).  Values: matrix blocks
    dists_and_offsets = DistsAndOffsets(neighbors, spos_ac, gd.cell_cv)

    #ng = 2**extra_parameters.get('log2ng', 10)
    #transformer = FourierTransformer(rcmax, ng)
    #tsoc = TwoSiteOverlapCalculator(transformer)
    #msoc = ManySiteOverlapCalculator(tsoc, I_a, I_a)

    msoc = wfs.tci.msoc

    phit_Ij = [setup.phit_j for setup in tci.setups_I]
    l_Ij = []
    for phit_j in phit_Ij:
        l_Ij.append([phit.get_angular_momentum_number() for phit in phit_j])

    pt_l_Ij = [setup.l_j for setup in tci.setups_I]
    pt_Ij = [setup.pt_j for setup in tci.setups_I]
    phit_Ijq = msoc.transform(phit_Ij)
    pt_Ijq = msoc.transform(pt_Ij)

    #self.Theta_expansions = msoc.calculate_expansions(l_Ij, phit_Ijq,
    #                                                  l_Ij, phit_Ijq)
    #self.T_expansions = msoc.calculate_kinetic_expansions(l_Ij, phit_Ijq)
    P_expansions = msoc.calculate_expansions(pt_l_Ij, pt_Ijq, l_Ij, phit_Ijq)
    P_neighbors_a = {}

    for a1 in atom_partition.my_indices:
        for a2 in range(len(wfs.setups)):
            R_ca_and_offset_a = dists_and_offsets.get(a1, a2)
            if R_ca_and_offset_a is None:  # No overlap between a1 and a2
                continue

            maxdistance = pcutoff_a[a1] + phicutoff_a[a2]
            expansion = P_expansions.get(a1, a2)
            P_qim = expansion.zeros((nq, ), dtype=wfs.dtype)
            disp = None
            for R_c, offset in R_ca_and_offset_a:
                r = np.linalg.norm(R_c)
                if r > maxdistance:
                    continue

                # Below lines are meant to make use of symmetry.  Will not
                # be relevant for P.
                #remainder = (a1 + a2) % 2
                #if a1 < a2 and not remainder:
                #    continue
                # if a1 > a2 and remainder:
                #    continue

                phases = overlapcalc.phaseclass(overlapcalc.ibzk_qc, offset)
                disp = AtomicDisplacement(None, a1, a2, R_c, offset, phases)
                disp.evaluate_overlap(expansion, P_qim)

            if disp is not None:  # there was at least one non-zero overlap
                assert (a1, a2) not in P_aaqim
                P_aaqim[(a1, a2)] = P_qim
                P_neighbors_a.setdefault(a1, []).append(a2)

    return P_neighbors_a, P_aaqim

Пример #6

Показать файл

Файл: newoverlap.py Проект: ryancoleman/lotsofcoresbook2code

def newoverlap(wfs, spos_ac):
    assert wfs.ksl.block_comm.size == wfs.gd.comm.size * wfs.bd.comm.size
    even_part = EvenPartitioning(wfs.gd.comm, #wfs.ksl.block_comm,
                                 len(wfs.atom_partition.rank_a))
    atom_partition = even_part.as_atom_partition()
    
    tci = wfs.tci

    gd = wfs.gd
    kd = wfs.kd
    nq = len(kd.ibzk_qc)

    # New neighbor list because we want it "both ways", heh.  Or do we?
    neighbors = NeighborList(tci.cutoff_a, skin=0,
                             sorted=True, self_interaction=True, bothways=False)
    atoms = Atoms('X%d' % len(tci.cutoff_a), cell=gd.cell_cv, pbc=gd.pbc_c)
    atoms.set_scaled_positions(spos_ac)
    neighbors.update(atoms)
    
    # XXX
    pcutoff_a = []
    phicutoff_a = []
    for setup in wfs.setups:
        if setup.pt_j:
            pcutoff = max([pt.get_cutoff() for pt in setup.pt_j])
        else:
            pcutoff = 0.0
        if setup.phit_j:
            phicutoff = max([phit.get_cutoff() for phit in setup.phit_j])
        else:
            phicutoff = 0.0
        pcutoff_a.append(pcutoff)
        phicutoff_a.append(phicutoff)

    # Calculate the projector--basis function overlaps:
    #
    #    a1        ~a1
    #   P      = < p   | Phi   > ,
    #    i mu       i       mu
    #
    # i.e. projector is on a1 and basis function is on what we will call a2.

    overlapcalc = TwoCenterIntegralCalculator(wfs.kd.ibzk_qc,
                                              derivative=False)
    
    P_aaqim = {} # keys: (a1, a2).  Values: matrix blocks
    dists_and_offsets = DistsAndOffsets(neighbors, spos_ac, gd.cell_cv)

    #ng = 2**extra_parameters.get('log2ng', 10)
    #transformer = FourierTransformer(rcmax, ng)
    #tsoc = TwoSiteOverlapCalculator(transformer)
    #msoc = ManySiteOverlapCalculator(tsoc, I_a, I_a)

    msoc = wfs.tci.msoc

    phit_Ij = [setup.phit_j for setup in tci.setups_I]
    l_Ij = []
    for phit_j in phit_Ij:
        l_Ij.append([phit.get_angular_momentum_number()
                     for phit in phit_j])

    pt_l_Ij = [setup.l_j for setup in tci.setups_I]        
    pt_Ij = [setup.pt_j for setup in tci.setups_I]
    phit_Ijq = msoc.transform(phit_Ij)
    pt_Ijq = msoc.transform(pt_Ij)

    #self.Theta_expansions = msoc.calculate_expansions(l_Ij, phit_Ijq,
    #                                                  l_Ij, phit_Ijq)
    #self.T_expansions = msoc.calculate_kinetic_expansions(l_Ij, phit_Ijq)
    P_expansions = msoc.calculate_expansions(pt_l_Ij, pt_Ijq,
                                             l_Ij, phit_Ijq)
    P_neighbors_a = {}
    
    for a1 in atom_partition.my_indices:
        for a2 in range(len(wfs.setups)):
            R_ca_and_offset_a = dists_and_offsets.get(a1, a2)
            if R_ca_and_offset_a is None: # No overlap between a1 and a2
                continue

            maxdistance = pcutoff_a[a1] + phicutoff_a[a2]
            expansion = P_expansions.get(a1, a2)
            P_qim = expansion.zeros((nq,), dtype=wfs.dtype)
            disp = None
            for R_c, offset in R_ca_and_offset_a:
                r = np.linalg.norm(R_c)
                if r > maxdistance:
                    continue
                
                # Below lines are meant to make use of symmetry.  Will not
                # be relevant for P.
                #remainder = (a1 + a2) % 2
                #if a1 < a2 and not remainder:
                #    continue
                # if a1 > a2 and remainder:
                #    continue
                
                phases = overlapcalc.phaseclass(overlapcalc.ibzk_qc, offset)
                disp = AtomicDisplacement(None, a1, a2, R_c, offset, phases)
                disp.evaluate_overlap(expansion, P_qim)
            
            if disp is not None: # there was at least one non-zero overlap
                assert (a1, a2) not in P_aaqim
                P_aaqim[(a1, a2)] = P_qim
                P_neighbors_a.setdefault(a1, []).append(a2)
    
    Pkeys = P_aaqim.keys()
    Pkeys.sort()

    def get_M1M2(a):
        M1 = wfs.setups.M_a[a]
        M2 = M1 + wfs.setups[a].nao
        return M1, M2
    
    oldstyle_P_aqMi = None
    if 0:#wfs.world.size == 1:
        oldstyle_P_aqMi = {}
        for a, setup in enumerate(wfs.setups):
            oldstyle_P_aqMi[a] = np.zeros((nq, wfs.setups.nao, setup.ni),
                                          dtype=wfs.dtype)
        print([(s.ni, s.nao) for s in wfs.setups])
        for a1, a2 in Pkeys:
            M1, M2 = get_M1M2(a2)
            Pconj_qmi = P_aaqim[(a1, a2)].transpose(0, 2, 1).conjugate()
            oldstyle_P_aqMi[a1][:, M1:M2, :] = Pconj_qmi
        
    # XXX mind distribution

    return P_neighbors_a, P_aaqim, oldstyle_P_aqMi

Пример #7

Показать файл

Файл: lcao.py Проект: Xu-Kai/lotsofcoresbook2code

    def calculate_forces(self, hamiltonian, F_av):
        self.timer.start('LCAO forces')

        spos_ac = self.tci.atoms.get_scaled_positions() % 1.0
        ksl = self.ksl
        nao = ksl.nao
        mynao = ksl.mynao
        nq = len(self.kd.ibzk_qc)
        dtype = self.dtype
        tci = self.tci
        gd = self.gd
        bfs = self.basis_functions

        Mstart = ksl.Mstart
        Mstop = ksl.Mstop

        from gpaw.kohnsham_layouts import BlacsOrbitalLayouts
        isblacs = isinstance(ksl, BlacsOrbitalLayouts)  # XXX

        if not isblacs:
            self.timer.start('TCI derivative')
            dThetadR_qvMM = np.empty((nq, 3, mynao, nao), dtype)
            dTdR_qvMM = np.empty((nq, 3, mynao, nao), dtype)
            dPdR_aqvMi = {}
            for a in self.basis_functions.my_atom_indices:
                ni = self.setups[a].ni
                dPdR_aqvMi[a] = np.empty((nq, 3, nao, ni), dtype)
            tci.calculate_derivative(spos_ac, dThetadR_qvMM, dTdR_qvMM,
                                     dPdR_aqvMi)
            gd.comm.sum(dThetadR_qvMM)
            gd.comm.sum(dTdR_qvMM)
            self.timer.stop('TCI derivative')

            my_atom_indices = bfs.my_atom_indices
            atom_indices = bfs.atom_indices

            def _slices(indices):
                for a in indices:
                    M1 = bfs.M_a[a] - Mstart
                    M2 = M1 + self.setups[a].nao
                    if M2 > 0:
                        yield a, max(0, M1), M2

            def slices():
                return _slices(atom_indices)

            def my_slices():
                return _slices(my_atom_indices)

        #
        #         -----                    -----
        #          \    -1                  \    *
        # E      =  )  S     H    rho     =  )  c     eps  f  c
        #  mu nu   /    mu x  x z    z nu   /    n mu    n  n  n nu
        #         -----                    -----
        #          x z                       n
        #
        # We use the transpose of that matrix.  The first form is used
        # if rho is given, otherwise the coefficients are used.
        self.timer.start('Initial')

        rhoT_uMM = []
        ET_uMM = []

        if not isblacs:
            if self.kpt_u[0].rho_MM is None:
                self.timer.start('Get density matrix')
                for kpt in self.kpt_u:
                    rhoT_MM = ksl.get_transposed_density_matrix(
                        kpt.f_n, kpt.C_nM)
                    rhoT_uMM.append(rhoT_MM)
                    ET_MM = ksl.get_transposed_density_matrix(
                        kpt.f_n * kpt.eps_n, kpt.C_nM)
                    ET_uMM.append(ET_MM)

                    if hasattr(kpt, 'c_on'):
                        # XXX does this work with BLACS/non-BLACS/etc.?
                        assert self.bd.comm.size == 1
                        d_nn = np.zeros((self.bd.mynbands, self.bd.mynbands),
                                        dtype=kpt.C_nM.dtype)
                        for ne, c_n in zip(kpt.ne_o, kpt.c_on):
                            d_nn += ne * np.outer(c_n.conj(), c_n)
                        rhoT_MM += ksl.get_transposed_density_matrix_delta(\
                            d_nn, kpt.C_nM)
                        ET_MM += ksl.get_transposed_density_matrix_delta(\
                            d_nn * kpt.eps_n, kpt.C_nM)
                self.timer.stop('Get density matrix')
            else:
                rhoT_uMM = []
                ET_uMM = []
                for kpt in self.kpt_u:
                    H_MM = self.eigensolver.calculate_hamiltonian_matrix(\
                        hamiltonian, self, kpt)
                    tri2full(H_MM)
                    S_MM = kpt.S_MM.copy()
                    tri2full(S_MM)
                    ET_MM = np.linalg.solve(S_MM,
                                            gemmdot(H_MM,
                                                    kpt.rho_MM)).T.copy()
                    del S_MM, H_MM
                    rhoT_MM = kpt.rho_MM.T.copy()
                    rhoT_uMM.append(rhoT_MM)
                    ET_uMM.append(ET_MM)
        self.timer.stop('Initial')

        if isblacs:  # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
            from gpaw.blacs import BlacsGrid, Redistributor

            def get_density_matrix(f_n, C_nM, redistributor):
                rho1_mm = ksl.calculate_blocked_density_matrix(f_n,
                                                               C_nM).conj()
                rho_mm = redistributor.redistribute(rho1_mm)
                return rho_mm

            pcutoff_a = [
                max([pt.get_cutoff() for pt in setup.pt_j])
                for setup in self.setups
            ]
            phicutoff_a = [
                max([phit.get_cutoff() for phit in setup.phit_j])
                for setup in self.setups
            ]

            # XXX should probably use bdsize x gdsize instead
            # That would be consistent with some existing grids
            grid = BlacsGrid(ksl.block_comm, self.gd.comm.size,
                             self.bd.comm.size)

            blocksize1 = -(-nao // grid.nprow)
            blocksize2 = -(-nao // grid.npcol)
            # XXX what are rows and columns actually?
            desc = grid.new_descriptor(nao, nao, blocksize1, blocksize2)

            rhoT_umm = []
            ET_umm = []
            redistributor = Redistributor(grid.comm, ksl.mmdescriptor, desc)
            Fpot_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                self.timer.start('Get density matrix')
                rhoT_mm = get_density_matrix(kpt.f_n, kpt.C_nM, redistributor)
                rhoT_umm.append(rhoT_mm)
                self.timer.stop('Get density matrix')

                self.timer.start('Potential')
                rhoT_mM = ksl.distribute_to_columns(rhoT_mm, desc)

                vt_G = hamiltonian.vt_sG[kpt.s]
                Fpot_av += bfs.calculate_force_contribution(
                    vt_G, rhoT_mM, kpt.q)
                del rhoT_mM
                self.timer.stop('Potential')

            self.timer.start('Get density matrix')
            for kpt in self.kpt_u:
                ET_mm = get_density_matrix(kpt.f_n * kpt.eps_n, kpt.C_nM,
                                           redistributor)
                ET_umm.append(ET_mm)
            self.timer.stop('Get density matrix')

            M1start = blocksize1 * grid.myrow
            M2start = blocksize2 * grid.mycol

            M1stop = min(M1start + blocksize1, nao)
            M2stop = min(M2start + blocksize2, nao)

            m1max = M1stop - M1start
            m2max = M2stop - M2start

        if not isblacs:
            # Kinetic energy contribution
            #
            #           ----- d T
            #  a         \       mu nu
            # F += 2 Re   )   -------- rho
            #            /    d R         nu mu
            #           -----    mu nu
            #        mu in a; nu
            #
            Fkin_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                dEdTrhoT_vMM = (dTdR_qvMM[kpt.q] *
                                rhoT_uMM[u][np.newaxis]).real
                for a, M1, M2 in my_slices():
                    Fkin_av[a, :] += \
                        2.0 * dEdTrhoT_vMM[:, M1:M2].sum(-1).sum(-1)
            del dEdTrhoT_vMM

            # Density matrix contribution due to basis overlap
            #
            #            ----- d Theta
            #  a          \           mu nu
            # F  += -2 Re  )   ------------  E
            #             /        d R        nu mu
            #            -----        mu nu
            #         mu in a; nu
            #
            Ftheta_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                dThetadRE_vMM = (dThetadR_qvMM[kpt.q] *
                                 ET_uMM[u][np.newaxis]).real
                for a, M1, M2 in my_slices():
                    Ftheta_av[a, :] += \
                        -2.0 * dThetadRE_vMM[:, M1:M2].sum(-1).sum(-1)
            del dThetadRE_vMM

        if isblacs:
            from gpaw.lcao.overlap import TwoCenterIntegralCalculator
            self.timer.start('Prepare TCI loop')
            M_a = bfs.M_a

            Fkin2_av = np.zeros_like(F_av)
            Ftheta2_av = np.zeros_like(F_av)

            cell_cv = tci.atoms.cell
            spos_ac = tci.atoms.get_scaled_positions() % 1.0

            overlapcalc = TwoCenterIntegralCalculator(self.kd.ibzk_qc,
                                                      derivative=False)

            # XXX this is not parallel *AT ALL*.
            self.timer.start('Get neighbors')
            nl = tci.atompairs.pairs.neighbors
            r_and_offset_aao = get_r_and_offsets(nl, spos_ac, cell_cv)
            atompairs = r_and_offset_aao.keys()
            atompairs.sort()
            self.timer.stop('Get neighbors')

            T_expansions = tci.T_expansions
            Theta_expansions = tci.Theta_expansions
            P_expansions = tci.P_expansions
            nq = len(self.kd.ibzk_qc)

            dH_asp = hamiltonian.dH_asp

            self.timer.start('broadcast dH')
            alldH_asp = {}
            for a in range(len(self.setups)):
                gdrank = bfs.sphere_a[a].rank
                if gdrank == gd.rank:
                    dH_sp = dH_asp[a]
                else:
                    ni = self.setups[a].ni
                    dH_sp = np.empty((self.nspins, ni * (ni + 1) // 2))
                gd.comm.broadcast(dH_sp, gdrank)
                # okay, now everyone gets copies of dH_sp
                alldH_asp[a] = dH_sp
            self.timer.stop('broadcast dH')

            # This will get sort of hairy.  We need to account for some
            # three-center overlaps, such as:
            #
            #         a1
            #      Phi   ~a3    a3  ~a3     a2     a2,a1
            #   < ----  |p  > dH   <p   |Phi  > rho
            #      dR
            #
            # To this end we will loop over all pairs of atoms (a1, a3),
            # and then a sub-loop over (a3, a2).
            from gpaw.lcao.overlap import DerivativeAtomicDisplacement

            class Displacement(DerivativeAtomicDisplacement):
                def __init__(self, a1, a2, R_c, offset):
                    phases = overlapcalc.phaseclass(overlapcalc.ibzk_qc,
                                                    offset)
                    DerivativeAtomicDisplacement.__init__(
                        self, None, a1, a2, R_c, offset, phases)

            # Cache of Displacement objects with spherical harmonics with
            # evaluated spherical harmonics.
            disp_aao = {}

            def get_displacements(a1, a2, maxdistance):
                # XXX the way maxdistance is handled it can lead to
                # bad caching when different maxdistances are passed
                # to subsequent calls with same pair of atoms
                disp_o = disp_aao.get((a1, a2))
                if disp_o is None:
                    disp_o = []
                    for R_c, offset in r_and_offset_aao[(a1, a2)]:
                        if np.linalg.norm(R_c) > maxdistance:
                            continue
                        disp = Displacement(a1, a2, R_c, offset)
                        disp_o.append(disp)
                    disp_aao[(a1, a2)] = disp_o
                return [disp for disp in disp_o if disp.r < maxdistance]

            self.timer.stop('Prepare TCI loop')
            self.timer.start('Not so complicated loop')

            for (a1, a2) in atompairs:
                if a1 >= a2:
                    # Actually this leads to bad load balance.
                    # We should take a1 > a2 or a1 < a2 equally many times.
                    # Maybe decide which of these choices
                    # depending on whether a2 % 1 == 0
                    continue

                m1start = M_a[a1] - M1start
                m2start = M_a[a2] - M2start
                if m1start >= blocksize1 or m2start >= blocksize2:
                    continue  # (we have only one block per CPU)

                T_expansion = T_expansions.get(a1, a2)
                Theta_expansion = Theta_expansions.get(a1, a2)
                #P_expansion = P_expansions.get(a1, a2)
                nm1, nm2 = T_expansion.shape

                m1stop = min(m1start + nm1, m1max)
                m2stop = min(m2start + nm2, m2max)

                if m1stop <= 0 or m2stop <= 0:
                    continue

                m1start = max(m1start, 0)
                m2start = max(m2start, 0)
                J1start = max(0, M1start - M_a[a1])
                J2start = max(0, M2start - M_a[a2])
                M1stop = J1start + m1stop - m1start
                J2stop = J2start + m2stop - m2start

                dTdR_qvmm = T_expansion.zeros((nq, 3), dtype=dtype)
                dThetadR_qvmm = Theta_expansion.zeros((nq, 3), dtype=dtype)

                disp_o = get_displacements(a1, a2,
                                           phicutoff_a[a1] + phicutoff_a[a2])
                for disp in disp_o:
                    disp.evaluate_overlap(T_expansion, dTdR_qvmm)
                    disp.evaluate_overlap(Theta_expansion, dThetadR_qvmm)

                for u, kpt in enumerate(self.kpt_u):
                    rhoT_mm = rhoT_umm[u][m1start:m1stop, m2start:m2stop]
                    ET_mm = ET_umm[u][m1start:m1stop, m2start:m2stop]
                    Fkin_v = 2.0 * (
                        dTdR_qvmm[kpt.q][:, J1start:M1stop, J2start:J2stop] *
                        rhoT_mm[np.newaxis]).real.sum(-1).sum(-1)
                    Ftheta_v = 2.0 * (dThetadR_qvmm[kpt.q][:, J1start:M1stop,
                                                           J2start:J2stop] *
                                      ET_mm[np.newaxis]).real.sum(-1).sum(-1)
                    Fkin2_av[a1] += Fkin_v
                    Fkin2_av[a2] -= Fkin_v
                    Ftheta2_av[a1] -= Ftheta_v
                    Ftheta2_av[a2] += Ftheta_v

            Fkin_av = Fkin2_av
            Ftheta_av = Ftheta2_av
            self.timer.stop('Not so complicated loop')

            dHP_and_dSP_aauim = {}

            a2values = {}
            for (a2, a3) in atompairs:
                if not a3 in a2values:
                    a2values[a3] = []
                a2values[a3].append(a2)

            Fatom_av = np.zeros_like(F_av)
            Frho_av = np.zeros_like(F_av)
            self.timer.start('Complicated loop')
            for a1, a3 in atompairs:
                if a1 == a3:
                    # Functions reside on same atom, so their overlap
                    # does not change when atom is displaced
                    continue
                m1start = M_a[a1] - M1start
                if m1start >= blocksize1:
                    continue

                P_expansion = P_expansions.get(a1, a3)
                nm1 = P_expansion.shape[0]
                m1stop = min(m1start + nm1, m1max)
                if m1stop <= 0:
                    continue

                m1start = max(m1start, 0)
                J1start = max(0, M1start - M_a[a1])
                J1stop = J1start + m1stop - m1start

                disp_o = get_displacements(a1, a3,
                                           phicutoff_a[a1] + pcutoff_a[a3])
                if len(disp_o) == 0:
                    continue

                dPdR_qvmi = P_expansion.zeros((nq, 3), dtype=dtype)
                for disp in disp_o:
                    disp.evaluate_overlap(P_expansion, dPdR_qvmi)

                dPdR_qvmi = dPdR_qvmi[:, :, J1start:J1stop, :].copy()
                for a2 in a2values[a3]:
                    m2start = M_a[a2] - M2start
                    if m2start >= blocksize2:
                        continue

                    P_expansion2 = P_expansions.get(a2, a3)
                    nm2 = P_expansion2.shape[0]
                    m2stop = min(m2start + nm2, m2max)
                    if m2stop <= 0:
                        continue

                    disp_o = get_displacements(a2, a3,
                                               phicutoff_a[a2] + pcutoff_a[a3])
                    if len(disp_o) == 0:
                        continue

                    m2start = max(m2start, 0)
                    J2start = max(0, M2start - M_a[a2])
                    J2stop = J2start + m2stop - m2start

                    if (a2, a3) in dHP_and_dSP_aauim:
                        dHP_uim, dSP_uim = dHP_and_dSP_aauim[(a2, a3)]
                    else:
                        P_qmi = P_expansion2.zeros((nq, ), dtype=dtype)
                        for disp in disp_o:
                            disp.evaluate_direct(P_expansion2, P_qmi)
                        P_qmi = P_qmi[:, J2start:J2stop].copy()
                        dH_sp = alldH_asp[a3]
                        dS_ii = self.setups[a3].dO_ii

                        dHP_uim = []
                        dSP_uim = []
                        for u, kpt in enumerate(self.kpt_u):
                            dH_ii = unpack(dH_sp[kpt.s])
                            dHP_im = np.dot(P_qmi[kpt.q], dH_ii).T.conj()
                            # XXX only need nq of these
                            dSP_im = np.dot(P_qmi[kpt.q], dS_ii).T.conj()
                            dHP_uim.append(dHP_im)
                            dSP_uim.append(dSP_im)
                            dHP_and_dSP_aauim[(a2, a3)] = dHP_uim, dSP_uim

                    for u, kpt in enumerate(self.kpt_u):
                        rhoT_mm = rhoT_umm[u][m1start:m1stop, m2start:m2stop]
                        ET_mm = ET_umm[u][m1start:m1stop, m2start:m2stop]
                        dPdRdHP_vmm = np.dot(dPdR_qvmi[kpt.q], dHP_uim[u])
                        dPdRdSP_vmm = np.dot(dPdR_qvmi[kpt.q], dSP_uim[u])

                        Fatom_c = 2.0 * (dPdRdHP_vmm *
                                         rhoT_mm).real.sum(-1).sum(-1)
                        Frho_c = 2.0 * (dPdRdSP_vmm *
                                        ET_mm).real.sum(-1).sum(-1)
                        Fatom_av[a1] += Fatom_c
                        Fatom_av[a3] -= Fatom_c

                        Frho_av[a1] -= Frho_c
                        Frho_av[a3] += Frho_c

            self.timer.stop('Complicated loop')

        if not isblacs:
            # Potential contribution
            #
            #           -----      /  d Phi  (r)
            #  a         \        |        mu    ~
            # F += -2 Re  )       |   ---------- v (r)  Phi  (r) dr rho
            #            /        |     d R                nu          nu mu
            #           -----    /         a
            #        mu in a; nu
            #
            self.timer.start('Potential')
            Fpot_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                vt_G = hamiltonian.vt_sG[kpt.s]
                Fpot_av += bfs.calculate_force_contribution(
                    vt_G, rhoT_uMM[u], kpt.q)
            self.timer.stop('Potential')

            # Density matrix contribution from PAW correction
            #
            #           -----                        -----
            #  a         \      a                     \     b
            # F +=  2 Re  )    Z      E        - 2 Re  )   Z      E
            #            /      mu nu  nu mu          /     mu nu  nu mu
            #           -----                        -----
            #           mu nu                    b; mu in a; nu
            #
            # with
            #                  b*
            #         -----  dP
            #   b      \       i mu    b   b
            #  Z     =  )   -------- dS   P
            #   mu nu  /     dR        ij  j nu
            #         -----    b mu
            #           ij
            #
            self.timer.start('Paw correction')
            Frho_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                work_MM = np.zeros((mynao, nao), dtype)
                ZE_MM = None
                for b in my_atom_indices:
                    setup = self.setups[b]
                    dO_ii = np.asarray(setup.dO_ii, dtype)
                    dOP_iM = np.zeros((setup.ni, nao), dtype)
                    gemm(1.0, self.P_aqMi[b][kpt.q], dO_ii, 0.0, dOP_iM, 'c')
                    for v in range(3):
                        gemm(1.0, dOP_iM,
                             dPdR_aqvMi[b][kpt.q][v][Mstart:Mstop], 0.0,
                             work_MM, 'n')
                        ZE_MM = (work_MM * ET_uMM[u]).real
                        for a, M1, M2 in slices():
                            dE = 2 * ZE_MM[M1:M2].sum()
                            Frho_av[a, v] -= dE  # the "b; mu in a; nu" term
                            Frho_av[b, v] += dE  # the "mu nu" term
            del work_MM, ZE_MM
            self.timer.stop('Paw correction')

            # Atomic density contribution
            #            -----                         -----
            #  a          \     a                       \     b
            # F  += -2 Re  )   A      rho       + 2 Re   )   A      rho
            #             /     mu nu    nu mu          /     mu nu    nu mu
            #            -----                         -----
            #            mu nu                     b; mu in a; nu
            #
            #                  b*
            #         ----- d P
            #  b       \       i mu   b   b
            # A     =   )   ------- dH   P
            #  mu nu   /    d R       ij  j nu
            #         -----    b mu
            #           ij
            #
            self.timer.start('Atomic Hamiltonian force')
            Fatom_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                for b in my_atom_indices:
                    H_ii = np.asarray(unpack(hamiltonian.dH_asp[b][kpt.s]),
                                      dtype)
                    HP_iM = gemmdot(
                        H_ii,
                        np.ascontiguousarray(self.P_aqMi[b][kpt.q].T.conj()))
                    for v in range(3):
                        dPdR_Mi = dPdR_aqvMi[b][kpt.q][v][Mstart:Mstop]
                        ArhoT_MM = (gemmdot(dPdR_Mi, HP_iM) * rhoT_uMM[u]).real
                        for a, M1, M2 in slices():
                            dE = 2 * ArhoT_MM[M1:M2].sum()
                            Fatom_av[a, v] += dE  # the "b; mu in a; nu" term
                            Fatom_av[b, v] -= dE  # the "mu nu" term
            self.timer.stop('Atomic Hamiltonian force')

        F_av += Fkin_av + Fpot_av + Ftheta_av + Frho_av + Fatom_av
        self.timer.start('Wait for sum')
        ksl.orbital_comm.sum(F_av)
        if self.bd.comm.rank == 0:
            self.kd.comm.sum(F_av, 0)
        self.timer.stop('Wait for sum')
        self.timer.stop('LCAO forces')