def calculate_dP_aqvMi(self, wfs): """Overlap between LCAO basis functions and gradient of projectors. Only the gradient wrt the atomic positions in the reference cell is computed. """ nao = wfs.setups.nao nq = len(wfs.ibzk_qc) atoms = [self.atoms[i] for i in self.indices] # Derivatives in reference cell dP_aqvMi = {} for atom, setup in zip(atoms, wfs.setups): a = atom.index dP_aqvMi[a] = np.zeros((nq, 3, nao, setup.ni), wfs.dtype) # Calculate overlap between basis function and gradient of projectors # NOTE: the derivative is calculated wrt the atomic position and not # the real-space coordinate calc = TwoCenterIntegralCalculator(wfs.ibzk_qc, derivative=True) expansions = ManySiteDictionaryWrapper(wfs.tci.P_expansions, dP_aqvMi) calc.calculate(wfs.tci.atompairs, [expansions], [dP_aqvMi]) # Extract derivatives in the reference unit cell # dP_aqvMi = {} # for atom in self.atoms: # dP_aqvMi[atom.index] = dPall_aqvMi[atom.index] return dP_aqvMi
def calculate_dP_aqvMi(self, wfs): """Overlap between LCAO basis functions and gradient of projectors. Only the gradient wrt the atomic positions in the reference cell is computed. """ nao = wfs.setups.nao nq = len(wfs.ibzk_qc) atoms = [self.atoms[i] for i in self.indices] # Derivatives in reference cell dP_aqvMi = {} for atom, setup in zip(atoms, wfs.setups): a = atom.index dP_aqvMi[a] = np.zeros((nq, 3, nao, setup.ni), wfs.dtype) # Calculate overlap between basis function and gradient of projectors # NOTE: the derivative is calculated wrt the atomic position and not # the real-space coordinate calc = TwoCenterIntegralCalculator(wfs.ibzk_qc, derivative=True) expansions = ManySiteDictionaryWrapper(wfs.tci.P_expansions, dP_aqvMi) calc.calculate(wfs.tci.atompairs, [expansions], [dP_aqvMi]) # Extract derivatives in the reference unit cell # dP_aqvMi = {} # for atom in self.atoms: # dP_aqvMi[atom.index] = dPall_aqvMi[atom.index] return dP_aqvMi
def get_tci_dP_aMix(spos_ac, wfs, q, *args, **kwargs): # container for spline expansions of basis function-projector pairs # (note to self: remember to conjugate/negate because of that) from gpaw.lcao.overlap import ManySiteDictionaryWrapper,\ TwoCenterIntegralCalculator, NewTwoCenterIntegrals if not isinstance(wfs.tci, NewTwoCenterIntegrals): raise RuntimeError('Please remember --gpaw=usenewtci=True') dP_aqxMi = {} nao = wfs.setups.nao nq = len(wfs.ibzk_qc) for a, setup in enumerate(wfs.setups): dP_aqxMi[a] = np.zeros((nq, 3, nao, setup.ni), wfs.dtype) calc = TwoCenterIntegralCalculator(wfs.ibzk_qc, derivative=True) expansions = ManySiteDictionaryWrapper(wfs.tci.P_expansions, dP_aqxMi) calc.calculate(wfs.tci.atompairs, [expansions], [dP_aqxMi]) dP_aMix = {} for a in dP_aqxMi: dP_aMix[a] = dP_aqxMi[a].transpose(0, 2, 3, 1).copy()[q] # XXX q return dP_aMix
def get_tci_dP_aMix(spos_ac, wfs, q, *args, **kwargs): # container for spline expansions of basis function-projector pairs # (note to self: remember to conjugate/negate because of that) from gpaw.lcao.overlap import ManySiteDictionaryWrapper,\ TwoCenterIntegralCalculator, NewTwoCenterIntegrals if not isinstance(wfs.tci, NewTwoCenterIntegrals): raise RuntimeError('Please remember --gpaw=usenewtci=True') dP_aqxMi = {} nao = wfs.setups.nao nq = len(wfs.ibzk_qc) for a, setup in enumerate(wfs.setups): dP_aqxMi[a] = np.zeros((nq, 3, nao, setup.ni), wfs.dtype) calc = TwoCenterIntegralCalculator(wfs.ibzk_qc, derivative=True) expansions = ManySiteDictionaryWrapper(wfs.tci.P_expansions, dP_aqxMi) calc.calculate(wfs.tci.atompairs, [expansions], [dP_aqxMi]) dP_aMix = {} for a in dP_aqxMi: dP_aMix[a] = dP_aqxMi[a].transpose(0, 2, 3, 1).copy()[q] # XXX q return dP_aMix
def newoverlap(wfs, spos_ac): assert wfs.ksl.block_comm.size == wfs.gd.comm.size * wfs.bd.comm.size #even_part = EvenPartitioning(wfs.gd.comm, #wfs.ksl.block_comm, # len(wfs.atom_partition.rank_a)) #atom_partition = even_part.as_atom_partition() # XXXXXXXXXXXXXXX atom_partition = wfs.atom_partition tci = wfs.tci gd = wfs.gd kd = wfs.kd nq = len(kd.ibzk_qc) # New neighbor list because we want it "both ways", heh. Or do we? neighbors = NeighborList(tci.cutoff_a, skin=0, sorted=True, self_interaction=True, bothways=False) atoms = Atoms('X%d' % len(tci.cutoff_a), cell=gd.cell_cv, pbc=gd.pbc_c) atoms.set_scaled_positions(spos_ac) neighbors.update(atoms) # XXX pcutoff_a = [] phicutoff_a = [] for setup in wfs.setups: if setup.pt_j: pcutoff = max([pt.get_cutoff() for pt in setup.pt_j]) else: pcutoff = 0.0 if setup.phit_j: phicutoff = max([phit.get_cutoff() for phit in setup.phit_j]) else: phicutoff = 0.0 pcutoff_a.append(pcutoff) phicutoff_a.append(phicutoff) # Calculate the projector--basis function overlaps: # # a1 ~a1 # P = < p | Phi > , # i mu i mu # # i.e. projector is on a1 and basis function is on what we will call a2. overlapcalc = TwoCenterIntegralCalculator(wfs.kd.ibzk_qc, derivative=False) P_aaqim = {} # keys: (a1, a2). Values: matrix blocks dists_and_offsets = DistsAndOffsets(neighbors, spos_ac, gd.cell_cv) #ng = 2**extra_parameters.get('log2ng', 10) #transformer = FourierTransformer(rcmax, ng) #tsoc = TwoSiteOverlapCalculator(transformer) #msoc = ManySiteOverlapCalculator(tsoc, I_a, I_a) msoc = wfs.tci.msoc phit_Ij = [setup.phit_j for setup in tci.setups_I] l_Ij = [] for phit_j in phit_Ij: l_Ij.append([phit.get_angular_momentum_number() for phit in phit_j]) pt_l_Ij = [setup.l_j for setup in tci.setups_I] pt_Ij = [setup.pt_j for setup in tci.setups_I] phit_Ijq = msoc.transform(phit_Ij) pt_Ijq = msoc.transform(pt_Ij) #self.Theta_expansions = msoc.calculate_expansions(l_Ij, phit_Ijq, # l_Ij, phit_Ijq) #self.T_expansions = msoc.calculate_kinetic_expansions(l_Ij, phit_Ijq) P_expansions = msoc.calculate_expansions(pt_l_Ij, pt_Ijq, l_Ij, phit_Ijq) P_neighbors_a = {} for a1 in atom_partition.my_indices: for a2 in range(len(wfs.setups)): R_ca_and_offset_a = dists_and_offsets.get(a1, a2) if R_ca_and_offset_a is None: # No overlap between a1 and a2 continue maxdistance = pcutoff_a[a1] + phicutoff_a[a2] expansion = P_expansions.get(a1, a2) P_qim = expansion.zeros((nq, ), dtype=wfs.dtype) disp = None for R_c, offset in R_ca_and_offset_a: r = np.linalg.norm(R_c) if r > maxdistance: continue # Below lines are meant to make use of symmetry. Will not # be relevant for P. #remainder = (a1 + a2) % 2 #if a1 < a2 and not remainder: # continue # if a1 > a2 and remainder: # continue phases = overlapcalc.phaseclass(overlapcalc.ibzk_qc, offset) disp = AtomicDisplacement(None, a1, a2, R_c, offset, phases) disp.evaluate_overlap(expansion, P_qim) if disp is not None: # there was at least one non-zero overlap assert (a1, a2) not in P_aaqim P_aaqim[(a1, a2)] = P_qim P_neighbors_a.setdefault(a1, []).append(a2) return P_neighbors_a, P_aaqim
def newoverlap(wfs, spos_ac): assert wfs.ksl.block_comm.size == wfs.gd.comm.size * wfs.bd.comm.size even_part = EvenPartitioning(wfs.gd.comm, #wfs.ksl.block_comm, len(wfs.atom_partition.rank_a)) atom_partition = even_part.as_atom_partition() tci = wfs.tci gd = wfs.gd kd = wfs.kd nq = len(kd.ibzk_qc) # New neighbor list because we want it "both ways", heh. Or do we? neighbors = NeighborList(tci.cutoff_a, skin=0, sorted=True, self_interaction=True, bothways=False) atoms = Atoms('X%d' % len(tci.cutoff_a), cell=gd.cell_cv, pbc=gd.pbc_c) atoms.set_scaled_positions(spos_ac) neighbors.update(atoms) # XXX pcutoff_a = [] phicutoff_a = [] for setup in wfs.setups: if setup.pt_j: pcutoff = max([pt.get_cutoff() for pt in setup.pt_j]) else: pcutoff = 0.0 if setup.phit_j: phicutoff = max([phit.get_cutoff() for phit in setup.phit_j]) else: phicutoff = 0.0 pcutoff_a.append(pcutoff) phicutoff_a.append(phicutoff) # Calculate the projector--basis function overlaps: # # a1 ~a1 # P = < p | Phi > , # i mu i mu # # i.e. projector is on a1 and basis function is on what we will call a2. overlapcalc = TwoCenterIntegralCalculator(wfs.kd.ibzk_qc, derivative=False) P_aaqim = {} # keys: (a1, a2). Values: matrix blocks dists_and_offsets = DistsAndOffsets(neighbors, spos_ac, gd.cell_cv) #ng = 2**extra_parameters.get('log2ng', 10) #transformer = FourierTransformer(rcmax, ng) #tsoc = TwoSiteOverlapCalculator(transformer) #msoc = ManySiteOverlapCalculator(tsoc, I_a, I_a) msoc = wfs.tci.msoc phit_Ij = [setup.phit_j for setup in tci.setups_I] l_Ij = [] for phit_j in phit_Ij: l_Ij.append([phit.get_angular_momentum_number() for phit in phit_j]) pt_l_Ij = [setup.l_j for setup in tci.setups_I] pt_Ij = [setup.pt_j for setup in tci.setups_I] phit_Ijq = msoc.transform(phit_Ij) pt_Ijq = msoc.transform(pt_Ij) #self.Theta_expansions = msoc.calculate_expansions(l_Ij, phit_Ijq, # l_Ij, phit_Ijq) #self.T_expansions = msoc.calculate_kinetic_expansions(l_Ij, phit_Ijq) P_expansions = msoc.calculate_expansions(pt_l_Ij, pt_Ijq, l_Ij, phit_Ijq) P_neighbors_a = {} for a1 in atom_partition.my_indices: for a2 in range(len(wfs.setups)): R_ca_and_offset_a = dists_and_offsets.get(a1, a2) if R_ca_and_offset_a is None: # No overlap between a1 and a2 continue maxdistance = pcutoff_a[a1] + phicutoff_a[a2] expansion = P_expansions.get(a1, a2) P_qim = expansion.zeros((nq,), dtype=wfs.dtype) disp = None for R_c, offset in R_ca_and_offset_a: r = np.linalg.norm(R_c) if r > maxdistance: continue # Below lines are meant to make use of symmetry. Will not # be relevant for P. #remainder = (a1 + a2) % 2 #if a1 < a2 and not remainder: # continue # if a1 > a2 and remainder: # continue phases = overlapcalc.phaseclass(overlapcalc.ibzk_qc, offset) disp = AtomicDisplacement(None, a1, a2, R_c, offset, phases) disp.evaluate_overlap(expansion, P_qim) if disp is not None: # there was at least one non-zero overlap assert (a1, a2) not in P_aaqim P_aaqim[(a1, a2)] = P_qim P_neighbors_a.setdefault(a1, []).append(a2) Pkeys = P_aaqim.keys() Pkeys.sort() def get_M1M2(a): M1 = wfs.setups.M_a[a] M2 = M1 + wfs.setups[a].nao return M1, M2 oldstyle_P_aqMi = None if 0:#wfs.world.size == 1: oldstyle_P_aqMi = {} for a, setup in enumerate(wfs.setups): oldstyle_P_aqMi[a] = np.zeros((nq, wfs.setups.nao, setup.ni), dtype=wfs.dtype) print([(s.ni, s.nao) for s in wfs.setups]) for a1, a2 in Pkeys: M1, M2 = get_M1M2(a2) Pconj_qmi = P_aaqim[(a1, a2)].transpose(0, 2, 1).conjugate() oldstyle_P_aqMi[a1][:, M1:M2, :] = Pconj_qmi # XXX mind distribution return P_neighbors_a, P_aaqim, oldstyle_P_aqMi
def calculate_forces(self, hamiltonian, F_av): self.timer.start('LCAO forces') spos_ac = self.tci.atoms.get_scaled_positions() % 1.0 ksl = self.ksl nao = ksl.nao mynao = ksl.mynao nq = len(self.kd.ibzk_qc) dtype = self.dtype tci = self.tci gd = self.gd bfs = self.basis_functions Mstart = ksl.Mstart Mstop = ksl.Mstop from gpaw.kohnsham_layouts import BlacsOrbitalLayouts isblacs = isinstance(ksl, BlacsOrbitalLayouts) # XXX if not isblacs: self.timer.start('TCI derivative') dThetadR_qvMM = np.empty((nq, 3, mynao, nao), dtype) dTdR_qvMM = np.empty((nq, 3, mynao, nao), dtype) dPdR_aqvMi = {} for a in self.basis_functions.my_atom_indices: ni = self.setups[a].ni dPdR_aqvMi[a] = np.empty((nq, 3, nao, ni), dtype) tci.calculate_derivative(spos_ac, dThetadR_qvMM, dTdR_qvMM, dPdR_aqvMi) gd.comm.sum(dThetadR_qvMM) gd.comm.sum(dTdR_qvMM) self.timer.stop('TCI derivative') my_atom_indices = bfs.my_atom_indices atom_indices = bfs.atom_indices def _slices(indices): for a in indices: M1 = bfs.M_a[a] - Mstart M2 = M1 + self.setups[a].nao if M2 > 0: yield a, max(0, M1), M2 def slices(): return _slices(atom_indices) def my_slices(): return _slices(my_atom_indices) # # ----- ----- # \ -1 \ * # E = ) S H rho = ) c eps f c # mu nu / mu x x z z nu / n mu n n n nu # ----- ----- # x z n # # We use the transpose of that matrix. The first form is used # if rho is given, otherwise the coefficients are used. self.timer.start('Initial') rhoT_uMM = [] ET_uMM = [] if not isblacs: if self.kpt_u[0].rho_MM is None: self.timer.start('Get density matrix') for kpt in self.kpt_u: rhoT_MM = ksl.get_transposed_density_matrix( kpt.f_n, kpt.C_nM) rhoT_uMM.append(rhoT_MM) ET_MM = ksl.get_transposed_density_matrix( kpt.f_n * kpt.eps_n, kpt.C_nM) ET_uMM.append(ET_MM) if hasattr(kpt, 'c_on'): # XXX does this work with BLACS/non-BLACS/etc.? assert self.bd.comm.size == 1 d_nn = np.zeros((self.bd.mynbands, self.bd.mynbands), dtype=kpt.C_nM.dtype) for ne, c_n in zip(kpt.ne_o, kpt.c_on): d_nn += ne * np.outer(c_n.conj(), c_n) rhoT_MM += ksl.get_transposed_density_matrix_delta(\ d_nn, kpt.C_nM) ET_MM += ksl.get_transposed_density_matrix_delta(\ d_nn * kpt.eps_n, kpt.C_nM) self.timer.stop('Get density matrix') else: rhoT_uMM = [] ET_uMM = [] for kpt in self.kpt_u: H_MM = self.eigensolver.calculate_hamiltonian_matrix(\ hamiltonian, self, kpt) tri2full(H_MM) S_MM = kpt.S_MM.copy() tri2full(S_MM) ET_MM = np.linalg.solve(S_MM, gemmdot(H_MM, kpt.rho_MM)).T.copy() del S_MM, H_MM rhoT_MM = kpt.rho_MM.T.copy() rhoT_uMM.append(rhoT_MM) ET_uMM.append(ET_MM) self.timer.stop('Initial') if isblacs: # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX from gpaw.blacs import BlacsGrid, Redistributor def get_density_matrix(f_n, C_nM, redistributor): rho1_mm = ksl.calculate_blocked_density_matrix(f_n, C_nM).conj() rho_mm = redistributor.redistribute(rho1_mm) return rho_mm pcutoff_a = [ max([pt.get_cutoff() for pt in setup.pt_j]) for setup in self.setups ] phicutoff_a = [ max([phit.get_cutoff() for phit in setup.phit_j]) for setup in self.setups ] # XXX should probably use bdsize x gdsize instead # That would be consistent with some existing grids grid = BlacsGrid(ksl.block_comm, self.gd.comm.size, self.bd.comm.size) blocksize1 = -(-nao // grid.nprow) blocksize2 = -(-nao // grid.npcol) # XXX what are rows and columns actually? desc = grid.new_descriptor(nao, nao, blocksize1, blocksize2) rhoT_umm = [] ET_umm = [] redistributor = Redistributor(grid.comm, ksl.mmdescriptor, desc) Fpot_av = np.zeros_like(F_av) for u, kpt in enumerate(self.kpt_u): self.timer.start('Get density matrix') rhoT_mm = get_density_matrix(kpt.f_n, kpt.C_nM, redistributor) rhoT_umm.append(rhoT_mm) self.timer.stop('Get density matrix') self.timer.start('Potential') rhoT_mM = ksl.distribute_to_columns(rhoT_mm, desc) vt_G = hamiltonian.vt_sG[kpt.s] Fpot_av += bfs.calculate_force_contribution( vt_G, rhoT_mM, kpt.q) del rhoT_mM self.timer.stop('Potential') self.timer.start('Get density matrix') for kpt in self.kpt_u: ET_mm = get_density_matrix(kpt.f_n * kpt.eps_n, kpt.C_nM, redistributor) ET_umm.append(ET_mm) self.timer.stop('Get density matrix') M1start = blocksize1 * grid.myrow M2start = blocksize2 * grid.mycol M1stop = min(M1start + blocksize1, nao) M2stop = min(M2start + blocksize2, nao) m1max = M1stop - M1start m2max = M2stop - M2start if not isblacs: # Kinetic energy contribution # # ----- d T # a \ mu nu # F += 2 Re ) -------- rho # / d R nu mu # ----- mu nu # mu in a; nu # Fkin_av = np.zeros_like(F_av) for u, kpt in enumerate(self.kpt_u): dEdTrhoT_vMM = (dTdR_qvMM[kpt.q] * rhoT_uMM[u][np.newaxis]).real for a, M1, M2 in my_slices(): Fkin_av[a, :] += \ 2.0 * dEdTrhoT_vMM[:, M1:M2].sum(-1).sum(-1) del dEdTrhoT_vMM # Density matrix contribution due to basis overlap # # ----- d Theta # a \ mu nu # F += -2 Re ) ------------ E # / d R nu mu # ----- mu nu # mu in a; nu # Ftheta_av = np.zeros_like(F_av) for u, kpt in enumerate(self.kpt_u): dThetadRE_vMM = (dThetadR_qvMM[kpt.q] * ET_uMM[u][np.newaxis]).real for a, M1, M2 in my_slices(): Ftheta_av[a, :] += \ -2.0 * dThetadRE_vMM[:, M1:M2].sum(-1).sum(-1) del dThetadRE_vMM if isblacs: from gpaw.lcao.overlap import TwoCenterIntegralCalculator self.timer.start('Prepare TCI loop') M_a = bfs.M_a Fkin2_av = np.zeros_like(F_av) Ftheta2_av = np.zeros_like(F_av) cell_cv = tci.atoms.cell spos_ac = tci.atoms.get_scaled_positions() % 1.0 overlapcalc = TwoCenterIntegralCalculator(self.kd.ibzk_qc, derivative=False) # XXX this is not parallel *AT ALL*. self.timer.start('Get neighbors') nl = tci.atompairs.pairs.neighbors r_and_offset_aao = get_r_and_offsets(nl, spos_ac, cell_cv) atompairs = r_and_offset_aao.keys() atompairs.sort() self.timer.stop('Get neighbors') T_expansions = tci.T_expansions Theta_expansions = tci.Theta_expansions P_expansions = tci.P_expansions nq = len(self.kd.ibzk_qc) dH_asp = hamiltonian.dH_asp self.timer.start('broadcast dH') alldH_asp = {} for a in range(len(self.setups)): gdrank = bfs.sphere_a[a].rank if gdrank == gd.rank: dH_sp = dH_asp[a] else: ni = self.setups[a].ni dH_sp = np.empty((self.nspins, ni * (ni + 1) // 2)) gd.comm.broadcast(dH_sp, gdrank) # okay, now everyone gets copies of dH_sp alldH_asp[a] = dH_sp self.timer.stop('broadcast dH') # This will get sort of hairy. We need to account for some # three-center overlaps, such as: # # a1 # Phi ~a3 a3 ~a3 a2 a2,a1 # < ---- |p > dH <p |Phi > rho # dR # # To this end we will loop over all pairs of atoms (a1, a3), # and then a sub-loop over (a3, a2). from gpaw.lcao.overlap import DerivativeAtomicDisplacement class Displacement(DerivativeAtomicDisplacement): def __init__(self, a1, a2, R_c, offset): phases = overlapcalc.phaseclass(overlapcalc.ibzk_qc, offset) DerivativeAtomicDisplacement.__init__( self, None, a1, a2, R_c, offset, phases) # Cache of Displacement objects with spherical harmonics with # evaluated spherical harmonics. disp_aao = {} def get_displacements(a1, a2, maxdistance): # XXX the way maxdistance is handled it can lead to # bad caching when different maxdistances are passed # to subsequent calls with same pair of atoms disp_o = disp_aao.get((a1, a2)) if disp_o is None: disp_o = [] for R_c, offset in r_and_offset_aao[(a1, a2)]: if np.linalg.norm(R_c) > maxdistance: continue disp = Displacement(a1, a2, R_c, offset) disp_o.append(disp) disp_aao[(a1, a2)] = disp_o return [disp for disp in disp_o if disp.r < maxdistance] self.timer.stop('Prepare TCI loop') self.timer.start('Not so complicated loop') for (a1, a2) in atompairs: if a1 >= a2: # Actually this leads to bad load balance. # We should take a1 > a2 or a1 < a2 equally many times. # Maybe decide which of these choices # depending on whether a2 % 1 == 0 continue m1start = M_a[a1] - M1start m2start = M_a[a2] - M2start if m1start >= blocksize1 or m2start >= blocksize2: continue # (we have only one block per CPU) T_expansion = T_expansions.get(a1, a2) Theta_expansion = Theta_expansions.get(a1, a2) #P_expansion = P_expansions.get(a1, a2) nm1, nm2 = T_expansion.shape m1stop = min(m1start + nm1, m1max) m2stop = min(m2start + nm2, m2max) if m1stop <= 0 or m2stop <= 0: continue m1start = max(m1start, 0) m2start = max(m2start, 0) J1start = max(0, M1start - M_a[a1]) J2start = max(0, M2start - M_a[a2]) M1stop = J1start + m1stop - m1start J2stop = J2start + m2stop - m2start dTdR_qvmm = T_expansion.zeros((nq, 3), dtype=dtype) dThetadR_qvmm = Theta_expansion.zeros((nq, 3), dtype=dtype) disp_o = get_displacements(a1, a2, phicutoff_a[a1] + phicutoff_a[a2]) for disp in disp_o: disp.evaluate_overlap(T_expansion, dTdR_qvmm) disp.evaluate_overlap(Theta_expansion, dThetadR_qvmm) for u, kpt in enumerate(self.kpt_u): rhoT_mm = rhoT_umm[u][m1start:m1stop, m2start:m2stop] ET_mm = ET_umm[u][m1start:m1stop, m2start:m2stop] Fkin_v = 2.0 * ( dTdR_qvmm[kpt.q][:, J1start:M1stop, J2start:J2stop] * rhoT_mm[np.newaxis]).real.sum(-1).sum(-1) Ftheta_v = 2.0 * (dThetadR_qvmm[kpt.q][:, J1start:M1stop, J2start:J2stop] * ET_mm[np.newaxis]).real.sum(-1).sum(-1) Fkin2_av[a1] += Fkin_v Fkin2_av[a2] -= Fkin_v Ftheta2_av[a1] -= Ftheta_v Ftheta2_av[a2] += Ftheta_v Fkin_av = Fkin2_av Ftheta_av = Ftheta2_av self.timer.stop('Not so complicated loop') dHP_and_dSP_aauim = {} a2values = {} for (a2, a3) in atompairs: if not a3 in a2values: a2values[a3] = [] a2values[a3].append(a2) Fatom_av = np.zeros_like(F_av) Frho_av = np.zeros_like(F_av) self.timer.start('Complicated loop') for a1, a3 in atompairs: if a1 == a3: # Functions reside on same atom, so their overlap # does not change when atom is displaced continue m1start = M_a[a1] - M1start if m1start >= blocksize1: continue P_expansion = P_expansions.get(a1, a3) nm1 = P_expansion.shape[0] m1stop = min(m1start + nm1, m1max) if m1stop <= 0: continue m1start = max(m1start, 0) J1start = max(0, M1start - M_a[a1]) J1stop = J1start + m1stop - m1start disp_o = get_displacements(a1, a3, phicutoff_a[a1] + pcutoff_a[a3]) if len(disp_o) == 0: continue dPdR_qvmi = P_expansion.zeros((nq, 3), dtype=dtype) for disp in disp_o: disp.evaluate_overlap(P_expansion, dPdR_qvmi) dPdR_qvmi = dPdR_qvmi[:, :, J1start:J1stop, :].copy() for a2 in a2values[a3]: m2start = M_a[a2] - M2start if m2start >= blocksize2: continue P_expansion2 = P_expansions.get(a2, a3) nm2 = P_expansion2.shape[0] m2stop = min(m2start + nm2, m2max) if m2stop <= 0: continue disp_o = get_displacements(a2, a3, phicutoff_a[a2] + pcutoff_a[a3]) if len(disp_o) == 0: continue m2start = max(m2start, 0) J2start = max(0, M2start - M_a[a2]) J2stop = J2start + m2stop - m2start if (a2, a3) in dHP_and_dSP_aauim: dHP_uim, dSP_uim = dHP_and_dSP_aauim[(a2, a3)] else: P_qmi = P_expansion2.zeros((nq, ), dtype=dtype) for disp in disp_o: disp.evaluate_direct(P_expansion2, P_qmi) P_qmi = P_qmi[:, J2start:J2stop].copy() dH_sp = alldH_asp[a3] dS_ii = self.setups[a3].dO_ii dHP_uim = [] dSP_uim = [] for u, kpt in enumerate(self.kpt_u): dH_ii = unpack(dH_sp[kpt.s]) dHP_im = np.dot(P_qmi[kpt.q], dH_ii).T.conj() # XXX only need nq of these dSP_im = np.dot(P_qmi[kpt.q], dS_ii).T.conj() dHP_uim.append(dHP_im) dSP_uim.append(dSP_im) dHP_and_dSP_aauim[(a2, a3)] = dHP_uim, dSP_uim for u, kpt in enumerate(self.kpt_u): rhoT_mm = rhoT_umm[u][m1start:m1stop, m2start:m2stop] ET_mm = ET_umm[u][m1start:m1stop, m2start:m2stop] dPdRdHP_vmm = np.dot(dPdR_qvmi[kpt.q], dHP_uim[u]) dPdRdSP_vmm = np.dot(dPdR_qvmi[kpt.q], dSP_uim[u]) Fatom_c = 2.0 * (dPdRdHP_vmm * rhoT_mm).real.sum(-1).sum(-1) Frho_c = 2.0 * (dPdRdSP_vmm * ET_mm).real.sum(-1).sum(-1) Fatom_av[a1] += Fatom_c Fatom_av[a3] -= Fatom_c Frho_av[a1] -= Frho_c Frho_av[a3] += Frho_c self.timer.stop('Complicated loop') if not isblacs: # Potential contribution # # ----- / d Phi (r) # a \ | mu ~ # F += -2 Re ) | ---------- v (r) Phi (r) dr rho # / | d R nu nu mu # ----- / a # mu in a; nu # self.timer.start('Potential') Fpot_av = np.zeros_like(F_av) for u, kpt in enumerate(self.kpt_u): vt_G = hamiltonian.vt_sG[kpt.s] Fpot_av += bfs.calculate_force_contribution( vt_G, rhoT_uMM[u], kpt.q) self.timer.stop('Potential') # Density matrix contribution from PAW correction # # ----- ----- # a \ a \ b # F += 2 Re ) Z E - 2 Re ) Z E # / mu nu nu mu / mu nu nu mu # ----- ----- # mu nu b; mu in a; nu # # with # b* # ----- dP # b \ i mu b b # Z = ) -------- dS P # mu nu / dR ij j nu # ----- b mu # ij # self.timer.start('Paw correction') Frho_av = np.zeros_like(F_av) for u, kpt in enumerate(self.kpt_u): work_MM = np.zeros((mynao, nao), dtype) ZE_MM = None for b in my_atom_indices: setup = self.setups[b] dO_ii = np.asarray(setup.dO_ii, dtype) dOP_iM = np.zeros((setup.ni, nao), dtype) gemm(1.0, self.P_aqMi[b][kpt.q], dO_ii, 0.0, dOP_iM, 'c') for v in range(3): gemm(1.0, dOP_iM, dPdR_aqvMi[b][kpt.q][v][Mstart:Mstop], 0.0, work_MM, 'n') ZE_MM = (work_MM * ET_uMM[u]).real for a, M1, M2 in slices(): dE = 2 * ZE_MM[M1:M2].sum() Frho_av[a, v] -= dE # the "b; mu in a; nu" term Frho_av[b, v] += dE # the "mu nu" term del work_MM, ZE_MM self.timer.stop('Paw correction') # Atomic density contribution # ----- ----- # a \ a \ b # F += -2 Re ) A rho + 2 Re ) A rho # / mu nu nu mu / mu nu nu mu # ----- ----- # mu nu b; mu in a; nu # # b* # ----- d P # b \ i mu b b # A = ) ------- dH P # mu nu / d R ij j nu # ----- b mu # ij # self.timer.start('Atomic Hamiltonian force') Fatom_av = np.zeros_like(F_av) for u, kpt in enumerate(self.kpt_u): for b in my_atom_indices: H_ii = np.asarray(unpack(hamiltonian.dH_asp[b][kpt.s]), dtype) HP_iM = gemmdot( H_ii, np.ascontiguousarray(self.P_aqMi[b][kpt.q].T.conj())) for v in range(3): dPdR_Mi = dPdR_aqvMi[b][kpt.q][v][Mstart:Mstop] ArhoT_MM = (gemmdot(dPdR_Mi, HP_iM) * rhoT_uMM[u]).real for a, M1, M2 in slices(): dE = 2 * ArhoT_MM[M1:M2].sum() Fatom_av[a, v] += dE # the "b; mu in a; nu" term Fatom_av[b, v] -= dE # the "mu nu" term self.timer.stop('Atomic Hamiltonian force') F_av += Fkin_av + Fpot_av + Ftheta_av + Frho_av + Fatom_av self.timer.start('Wait for sum') ksl.orbital_comm.sum(F_av) if self.bd.comm.rank == 0: self.kd.comm.sum(F_av, 0) self.timer.stop('Wait for sum') self.timer.stop('LCAO forces')