def density_matrix(self, n, m, k, Gspace=True): ibzk_kc = self.ibzk_kc bzk_kc = self.bzk_kc kq_k = self.kq_k gd = self.gd kd = self.kd ibzkpt1 = kd.kibz_k[k] ibzkpt2 = kd.kibz_k[kq_k[k]] psitold_g = self.get_wavefunction(ibzkpt1, n, True) psit1_g = kd.transform_wave_function(psitold_g, k) psitold_g = self.get_wavefunction(ibzkpt2, m, True) psit2_g = kd.transform_wave_function(psitold_g, kq_k[k]) if Gspace is False: return psit1_g, psit2_g else: # FFT tmp_g = psit1_g.conj() * psit2_g * self.expqr_g rho_g = np.fft.fftn(tmp_g) * self.vol / self.nG0 # Here, planewave cutoff is applied rho_G = np.zeros(self.npw, dtype=complex) for iG in range(self.npw): index = self.Gindex_G[iG] rho_G[iG] = rho_g[index[0], index[1], index[2]] if self.optical_limit: d_c = [ Gradient(gd, i, n=4, dtype=complex).apply for i in range(3) ] dpsit_g = gd.empty(dtype=complex) tmp = np.zeros((3), dtype=complex) phase_cd = np.exp(2j * pi * gd.sdisp_cd * bzk_kc[kq_k[k], :, np.newaxis]) for ix in range(3): d_c[ix](psit2_g, dpsit_g, phase_cd) tmp[ix] = gd.integrate(psit1_g.conj() * dpsit_g) rho_G[0] = -1j * np.dot(self.qq_v, tmp) # PAW correction pt = self.pt P1_ai = pt.dict() pt.integrate(psit1_g, P1_ai, k) P2_ai = pt.dict() pt.integrate(psit2_g, P2_ai, kq_k[k]) for a, id in enumerate(self.calc.wfs.setups.id_a): P_p = np.outer(P1_ai[a].conj(), P2_ai[a]).ravel() gemv(1.0, self.phi_aGp[a], P_p, 1.0, rho_G) if self.optical_limit: rho_G[0] /= self.e_kn[ibzkpt2, m] - self.e_kn[ibzkpt1, n] return rho_G
def density_matrix(self,n,m,k,Gspace=True): ibzk_kc = self.ibzk_kc bzk_kc = self.bzk_kc kq_k = self.kq_k gd = self.gd kd = self.kd ibzkpt1 = kd.kibz_k[k] ibzkpt2 = kd.kibz_k[kq_k[k]] psitold_g = self.get_wavefunction(ibzkpt1, n, True) psit1_g = kd.transform_wave_function(psitold_g, k) psitold_g = self.get_wavefunction(ibzkpt2, m, True) psit2_g = kd.transform_wave_function(psitold_g, kq_k[k]) if Gspace is False: return psit1_g, psit2_g else: # FFT tmp_g = psit1_g.conj()* psit2_g * self.expqr_g rho_g = np.fft.fftn(tmp_g) * self.vol / self.nG0 # Here, planewave cutoff is applied rho_G = np.zeros(self.npw, dtype=complex) for iG in range(self.npw): index = self.Gindex_G[iG] rho_G[iG] = rho_g[index[0], index[1], index[2]] if self.optical_limit: d_c = [Gradient(gd, i, n=4, dtype=complex).apply for i in range(3)] dpsit_g = gd.empty(dtype=complex) tmp = np.zeros((3), dtype=complex) phase_cd = np.exp(2j * pi * gd.sdisp_cd * bzk_kc[kq_k[k], :, np.newaxis]) for ix in range(3): d_c[ix](psit2_g, dpsit_g, phase_cd) tmp[ix] = gd.integrate(psit1_g.conj() * dpsit_g) rho_G[0] = -1j * np.dot(self.qq_v, tmp) # PAW correction pt = self.pt P1_ai = pt.dict() pt.integrate(psit1_g, P1_ai, k) P2_ai = pt.dict() pt.integrate(psit2_g, P2_ai, kq_k[k]) for a, id in enumerate(self.calc.wfs.setups.id_a): P_p = np.outer(P1_ai[a].conj(), P2_ai[a]).ravel() gemv(1.0, self.phi_aGp[a], P_p, 1.0, rho_G) if self.optical_limit: rho_G[0] /= self.e_kn[ibzkpt2, m] - self.e_kn[ibzkpt1, n] return rho_G
def calculate(self, spin=0): """Calculate the non-interacting density response function. """ calc = self.calc kd = self.kd gd = self.gd sdisp_cd = gd.sdisp_cd ibzk_kc = self.ibzk_kc bzk_kc = self.bzk_kc kq_k = self.kq_k pt = self.pt f_kn = self.f_kn e_kn = self.e_kn # Matrix init chi0_wGG = np.zeros((self.Nw_local, self.npw, self.npw), dtype=complex) if not (f_kn > self.ftol).any(): self.chi0_wGG = chi0_wGG return if self.hilbert_trans: specfunc_wGG = np.zeros((self.NwS_local, self.npw, self.npw), dtype = complex) # Prepare for the derivative of pseudo-wavefunction if self.optical_limit: d_c = [Gradient(gd, i, n=4, dtype=complex).apply for i in range(3)] dpsit_g = gd.empty(dtype=complex) tmp = np.zeros((3), dtype=complex) rho_G = np.zeros(self.npw, dtype=complex) t0 = time() t_get_wfs = 0 for k in range(self.kstart, self.kend): # Find corresponding kpoint in IBZ ibzkpt1 = kd.kibz_k[k] if self.optical_limit: ibzkpt2 = ibzkpt1 else: ibzkpt2 = kd.kibz_k[kq_k[k]] for n in range(self.nstart, self.nend): # print >> self.txt, k, n, t_get_wfs, time() - t0 t1 = time() psitold_g = self.get_wavefunction(ibzkpt1, n, True, spin=spin) t_get_wfs += time() - t1 psit1new_g = kd.transform_wave_function(psitold_g, k) P1_ai = pt.dict() pt.integrate(psit1new_g, P1_ai, k) psit1_g = psit1new_g.conj() * self.expqr_g for m in range(self.nbands): if self.hilbert_trans: check_focc = (f_kn[ibzkpt1, n] - f_kn[ibzkpt2, m]) > self.ftol else: check_focc = np.abs(f_kn[ibzkpt1, n] - f_kn[ibzkpt2, m]) > self.ftol t1 = time() psitold_g = self.get_wavefunction(ibzkpt2, m, check_focc, spin=spin) t_get_wfs += time() - t1 if check_focc: psit2_g = kd.transform_wave_function(psitold_g, kq_k[k]) P2_ai = pt.dict() pt.integrate(psit2_g, P2_ai, kq_k[k]) # fft tmp_g = np.fft.fftn(psit2_g*psit1_g) * self.vol / self.nG0 for iG in range(self.npw): index = self.Gindex_G[iG] rho_G[iG] = tmp_g[index[0], index[1], index[2]] if self.optical_limit: phase_cd = np.exp(2j * pi * sdisp_cd * bzk_kc[kq_k[k], :, np.newaxis]) for ix in range(3): d_c[ix](psit2_g, dpsit_g, phase_cd) tmp[ix] = gd.integrate(psit1_g * dpsit_g) rho_G[0] = -1j * np.dot(self.qq_v, tmp) # PAW correction for a, id in enumerate(calc.wfs.setups.id_a): P_p = np.outer(P1_ai[a].conj(), P2_ai[a]).ravel() gemv(1.0, self.phi_aGp[a], P_p, 1.0, rho_G) if self.optical_limit: rho_G[0] /= e_kn[ibzkpt2, m] - e_kn[ibzkpt1, n] rho_GG = np.outer(rho_G, rho_G.conj()) if not self.hilbert_trans: for iw in range(self.Nw_local): w = self.w_w[iw + self.wstart] / Hartree C = (f_kn[ibzkpt1, n] - f_kn[ibzkpt2, m]) / ( w + e_kn[ibzkpt1, n] - e_kn[ibzkpt2, m] + 1j * self.eta) axpy(C, rho_GG, chi0_wGG[iw]) else: focc = f_kn[ibzkpt1,n] - f_kn[ibzkpt2,m] w0 = e_kn[ibzkpt2,m] - e_kn[ibzkpt1,n] scal(focc, rho_GG) # calculate delta function w0_id = int(w0 / self.dw) if w0_id + 1 < self.NwS: # rely on the self.NwS_local is equal in each node! if self.wScomm.rank == w0_id // self.NwS_local: alpha = (w0_id + 1 - w0/self.dw) / self.dw axpy(alpha, rho_GG, specfunc_wGG[w0_id % self.NwS_local] ) if self.wScomm.rank == (w0_id+1) // self.NwS_local: alpha = (w0 / self.dw - w0_id) / self.dw axpy(alpha, rho_GG, specfunc_wGG[(w0_id+1) % self.NwS_local] ) # deltaw = delta_function(w0, self.dw, self.NwS, self.sigma) # for wi in range(self.NwS_local): # if deltaw[wi + self.wS1] > 1e-8: # specfunc_wGG[wi] += tmp_GG * deltaw[wi + self.wS1] if self.nkpt == 1: if n == 0: dt = time() - t0 totaltime = dt * self.nband_local self.printtxt('Finished n 0 in %f seconds, estimated %f seconds left.' %(dt, totaltime) ) if rank == 0 and self.nband_local // 5 > 0: if n > 0 and n % (self.nband_local // 5) == 0: dt = time() - t0 self.printtxt('Finished n %d in %f seconds, estimated %f seconds left.'%(n, dt, totaltime-dt)) if calc.wfs.world.size != 1: self.kcomm.barrier() if k == 0: dt = time() - t0 totaltime = dt * self.nkpt_local self.printtxt('Finished k 0 in %f seconds, estimated %f seconds left.' %(dt, totaltime)) if rank == 0 and self.nkpt_local // 5 > 0: if k > 0 and k % (self.nkpt_local // 5) == 0: dt = time() - t0 self.printtxt('Finished k %d in %f seconds, estimated %f seconds left. '%(k, dt, totaltime - dt) ) self.printtxt('Finished summation over k') self.kcomm.barrier() del rho_GG, rho_G # Hilbert Transform if not self.hilbert_trans: self.kcomm.sum(chi0_wGG) else: self.kcomm.sum(specfunc_wGG) if self.wScomm.size == 1: if not self.full_hilbert_trans: chi0_wGG = hilbert_transform(specfunc_wGG, self.Nw, self.dw, self.eta)[self.wstart:self.wend] else: chi0_wGG = full_hilbert_transform(specfunc_wGG, self.Nw, self.dw, self.eta)[self.wstart:self.wend] self.printtxt('Finished hilbert transform !') del specfunc_wGG else: # redistribute specfunc_wGG to all nodes assert self.NwS % size == 0 NwStmp1 = (rank % self.kcomm.size) * self.NwS // size NwStmp2 = (rank % self.kcomm.size + 1) * self.NwS // size specfuncnew_wGG = specfunc_wGG[NwStmp1:NwStmp2] del specfunc_wGG coords = np.zeros(self.wcomm.size, dtype=int) nG_local = self.npw**2 // self.wcomm.size if self.wcomm.rank == self.wcomm.size - 1: nG_local = self.npw**2 - (self.wcomm.size - 1) * nG_local self.wcomm.all_gather(np.array([nG_local]), coords) specfunc_Wg = SliceAlongFrequency(specfuncnew_wGG, coords, self.wcomm) self.printtxt('Finished Slice Along Frequency !') if not self.full_hilbert_trans: chi0_Wg = hilbert_transform(specfunc_Wg, self.Nw, self.dw, self.eta)[:self.Nw] else: chi0_Wg = full_hilbert_transform(specfunc_Wg, self.Nw, self.dw, self.eta)[:self.Nw] self.printtxt('Finished hilbert transform !') self.comm.barrier() del specfunc_Wg chi0_wGG = SliceAlongOrbitals(chi0_Wg, coords, self.wcomm) self.printtxt('Finished Slice along orbitals !') self.comm.barrier() del chi0_Wg self.chi0_wGG = chi0_wGG / self.vol self.printtxt('') self.printtxt('Finished chi0 !') return
def calculate(self, seperate_spin=None): """Calculate the non-interacting density response function. """ calc = self.calc kd = self.kd gd = self.gd sdisp_cd = gd.sdisp_cd ibzk_kc = kd.ibzk_kc bzk_kc = kd.bzk_kc kq_k = self.kq_k f_skn = self.f_skn e_skn = self.e_skn # Matrix init chi0_wGG = np.zeros((self.Nw_local, self.npw, self.npw), dtype=complex) if self.hilbert_trans: specfunc_wGG = np.zeros((self.NwS_local, self.npw, self.npw), dtype=complex) # Prepare for the derivative of pseudo-wavefunction if self.optical_limit: d_c = [Gradient(gd, i, n=4, dtype=complex).apply for i in range(3)] dpsit_g = gd.empty(dtype=complex) tmp = np.zeros((3), dtype=complex) rhoG0_v = np.zeros(3, dtype=complex) self.chi0G0_wGv = np.zeros((self.Nw_local, self.npw, 3), dtype=complex) self.chi00G_wGv = np.zeros((self.Nw_local, self.npw, 3), dtype=complex) specfuncG0_wGv = np.zeros((self.NwS_local, self.npw, 3), dtype=complex) specfunc0G_wGv = np.zeros((self.NwS_local, self.npw, 3), dtype=complex) use_zher = False if self.eta < 1e-5: use_zher = True rho_G = np.zeros(self.npw, dtype=complex) t0 = time() if seperate_spin is None: spinlist = np.arange(self.nspins) else: spinlist = [seperate_spin] for spin in spinlist: if not (f_skn[spin] > self.ftol).any(): self.chi0_wGG = chi0_wGG continue for k in range(self.kstart, self.kend): k_pad = False if k >= self.kd.nbzkpts: k = 0 k_pad = True # Find corresponding kpoint in IBZ ibzkpt1 = kd.bz2ibz_k[k] if self.optical_limit: ibzkpt2 = ibzkpt1 else: ibzkpt2 = kd.bz2ibz_k[kq_k[k]] if self.pwmode: N_c = self.gd.N_c k_c = self.kd.ibzk_kc[ibzkpt1] eikr1_R = np.exp(2j * pi * np.dot(np.indices(N_c).T, k_c / N_c).T) k_c = self.kd.ibzk_kc[ibzkpt2] eikr2_R = np.exp(2j * pi * np.dot(np.indices(N_c).T, k_c / N_c).T) index1_g, phase1_g = kd.get_transform_wavefunction_index(self.gd.N_c - (self.pbc == False), k) index2_g, phase2_g = kd.get_transform_wavefunction_index(self.gd.N_c - (self.pbc == False), kq_k[k]) for n in range(self.nvalbands): if self.calc.wfs.world.size == 1: if self.f_skn[spin][ibzkpt1, n] - self.ftol < 0: continue t1 = time() if self.pwmode: u = self.kd.get_rank_and_index(spin, ibzkpt1)[1] psitold_g = calc.wfs._get_wave_function_array(u, n, realspace=True, phase=eikr1_R) else: u = None psitold_g = self.get_wavefunction(ibzkpt1, n, True, spin=spin) psit1new_g = kd.transform_wave_function(psitold_g, k, index1_g, phase1_g) P1_ai = self.pawstuff(psit1new_g, k, n, spin, u, ibzkpt1) psit1_g = psit1new_g.conj() * self.expqr_g for m in self.mlist: if self.nbands > 1000 and m % 200 == 0: print(" ", k, n, m, time() - t0, file=self.txt) check_focc = (f_skn[spin][ibzkpt1, n] - f_skn[spin][ibzkpt2, m]) > self.ftol if not self.pwmode: psitold_g = self.get_wavefunction(ibzkpt2, m, check_focc, spin=spin) if check_focc: if self.pwmode: u = self.kd.get_rank_and_index(spin, ibzkpt2)[1] psitold_g = calc.wfs._get_wave_function_array(u, m, realspace=True, phase=eikr2_R) psit2_g = kd.transform_wave_function(psitold_g, kq_k[k], index2_g, phase2_g) # zero padding is included through the FFT rho_g = np.fft.fftn(psit2_g * psit1_g, s=self.nGrpad) * self.vol / self.nG0rpad # Here, planewave cutoff is applied rho_G = rho_g.ravel()[self.Gindex_G] if self.optical_limit: phase_cd = np.exp(2j * pi * sdisp_cd * kd.bzk_kc[kq_k[k], :, np.newaxis]) for ix in range(3): d_c[ix](psit2_g, dpsit_g, phase_cd) tmp[ix] = gd.integrate(psit1_g * dpsit_g) rho_G[0] = -1j * np.dot(self.qq_v, tmp) for ix in range(3): q2_c = np.diag((1, 1, 1))[ix] * self.qopt qq2_v = np.dot(q2_c, self.bcell_cv) # summation over c rhoG0_v[ix] = -1j * np.dot(qq2_v, tmp) P2_ai = self.pawstuff(psit2_g, kq_k[k], m, spin, u, ibzkpt2) for a, id in enumerate(calc.wfs.setups.id_a): P_p = np.outer(P1_ai[a].conj(), P2_ai[a]).ravel() gemv(1.0, self.phi_aGp[a], P_p, 1.0, rho_G) if self.optical_limit: gemv(1.0, self.phiG0_avp[a], P_p, 1.0, rhoG0_v) if self.optical_limit: if ( np.abs(self.enoshift_skn[spin][ibzkpt2, m] - self.enoshift_skn[spin][ibzkpt1, n]) > 0.1 / Hartree ): rho_G[0] /= ( self.enoshift_skn[spin][ibzkpt2, m] - self.enoshift_skn[spin][ibzkpt1, n] ) rhoG0_v /= self.enoshift_skn[spin][ibzkpt2, m] - self.enoshift_skn[spin][ibzkpt1, n] else: rho_G[0] = 0.0 rhoG0_v[:] = 0.0 if k_pad: rho_G[:] = 0.0 if self.optical_limit: rho0G_Gv = np.outer(rho_G.conj(), rhoG0_v) rhoG0_Gv = np.outer(rho_G, rhoG0_v.conj()) rho0G_Gv[0, :] = rhoG0_v * rhoG0_v.conj() rhoG0_Gv[0, :] = rhoG0_v * rhoG0_v.conj() if not self.hilbert_trans: if not use_zher: rho_GG = np.outer(rho_G, rho_G.conj()) for iw in range(self.Nw_local): w = self.w_w[iw + self.wstart] / Hartree coef = 1.0 / ( w + e_skn[spin][ibzkpt1, n] - e_skn[spin][ibzkpt2, m] + 1j * self.eta ) - 1.0 / (w - e_skn[spin][ibzkpt1, n] + e_skn[spin][ibzkpt2, m] + 1j * self.eta) C = (f_skn[spin][ibzkpt1, n] - f_skn[spin][ibzkpt2, m]) * coef if use_zher: czher(C.real, rho_G.conj(), chi0_wGG[iw]) else: axpy(C, rho_GG, chi0_wGG[iw]) if self.optical_limit: axpy(C, rho0G_Gv, self.chi00G_wGv[iw]) axpy(C, rhoG0_Gv, self.chi0G0_wGv[iw]) else: rho_GG = np.outer(rho_G, rho_G.conj()) focc = f_skn[spin][ibzkpt1, n] - f_skn[spin][ibzkpt2, m] w0 = e_skn[spin][ibzkpt2, m] - e_skn[spin][ibzkpt1, n] scal(focc, rho_GG) if self.optical_limit: scal(focc, rhoG0_Gv) scal(focc, rho0G_Gv) # calculate delta function w0_id = int(w0 / self.dw) if w0_id + 1 < self.NwS: # rely on the self.NwS_local is equal in each node! if self.wScomm.rank == w0_id // self.NwS_local: alpha = (w0_id + 1 - w0 / self.dw) / self.dw axpy(alpha, rho_GG, specfunc_wGG[w0_id % self.NwS_local]) if self.optical_limit: axpy(alpha, rho0G_Gv, specfunc0G_wGv[w0_id % self.NwS_local]) axpy(alpha, rhoG0_Gv, specfuncG0_wGv[w0_id % self.NwS_local]) if self.wScomm.rank == (w0_id + 1) // self.NwS_local: alpha = (w0 / self.dw - w0_id) / self.dw axpy(alpha, rho_GG, specfunc_wGG[(w0_id + 1) % self.NwS_local]) if self.optical_limit: axpy(alpha, rho0G_Gv, specfunc0G_wGv[(w0_id + 1) % self.NwS_local]) axpy(alpha, rhoG0_Gv, specfuncG0_wGv[(w0_id + 1) % self.NwS_local]) # deltaw = delta_function(w0, self.dw, self.NwS, self.sigma) # for wi in range(self.NwS_local): # if deltaw[wi + self.wS1] > 1e-8: # specfunc_wGG[wi] += tmp_GG * deltaw[wi + self.wS1] if self.kd.nbzkpts == 1: if n == 0: dt = time() - t0 totaltime = dt * self.nvalbands * self.nspins self.printtxt("Finished n 0 in %d seconds, estimate %d seconds left." % (dt, totaltime)) if rank == 0 and self.nvalbands // 5 > 0: if n > 0 and n % (self.nvalbands // 5) == 0: dt = time() - t0 self.printtxt( "Finished n %d in %d seconds, estimate %d seconds left." % (n, dt, totaltime - dt) ) if calc.wfs.world.size != 1: self.kcomm.barrier() if k == 0: dt = time() - t0 totaltime = dt * self.nkpt_local * self.nspins self.printtxt("Finished k 0 in %d seconds, estimate %d seconds left." % (dt, totaltime)) if rank == 0 and self.nkpt_local // 5 > 0: if k > 0 and k % (self.nkpt_local // 5) == 0: dt = time() - t0 self.printtxt( "Finished k %d in %d seconds, estimate %d seconds left. " % (k, dt, totaltime - dt) ) self.printtxt("Finished summation over k") self.kcomm.barrier() # Hilbert Transform if not self.hilbert_trans: for iw in range(self.Nw_local): self.kcomm.sum(chi0_wGG[iw]) if self.optical_limit: self.kcomm.sum(self.chi0G0_wGv[iw]) self.kcomm.sum(self.chi00G_wGv[iw]) if use_zher: assert (np.abs(chi0_wGG[0, 1:, 0]) < 1e-10).all() for iw in range(self.Nw_local): chi0_wGG[iw] += chi0_wGG[iw].conj().T for iG in range(self.npw): chi0_wGG[iw, iG, iG] /= 2.0 assert np.abs(np.imag(chi0_wGG[iw, iG, iG])) < 1e-10 else: for iw in range(self.NwS_local): self.kcomm.sum(specfunc_wGG[iw]) if self.optical_limit: self.kcomm.sum(specfuncG0_wGv[iw]) self.kcomm.sum(specfunc0G_wGv[iw]) if self.wScomm.size == 1: chi0_wGG = hilbert_transform( specfunc_wGG, self.w_w, self.Nw, self.dw, self.eta, self.full_hilbert_trans )[self.wstart : self.wend] self.printtxt("Finished hilbert transform !") del specfunc_wGG else: # redistribute specfunc_wGG to all nodes size = self.comm.size assert self.NwS % size == 0 NwStmp1 = (rank % self.kcomm.size) * self.NwS // size NwStmp2 = (rank % self.kcomm.size + 1) * self.NwS // size specfuncnew_wGG = specfunc_wGG[NwStmp1:NwStmp2] del specfunc_wGG coords = np.zeros(self.wcomm.size, dtype=int) nG_local = self.npw ** 2 // self.wcomm.size if self.wcomm.rank == self.wcomm.size - 1: nG_local = self.npw ** 2 - (self.wcomm.size - 1) * nG_local self.wcomm.all_gather(np.array([nG_local]), coords) specfunc_Wg = SliceAlongFrequency(specfuncnew_wGG, coords, self.wcomm) self.printtxt("Finished Slice Along Frequency !") chi0_Wg = hilbert_transform(specfunc_Wg, self.w_w, self.Nw, self.dw, self.eta, self.full_hilbert_trans)[ : self.Nw ] self.printtxt("Finished hilbert transform !") self.comm.barrier() del specfunc_Wg chi0_wGG = SliceAlongOrbitals(chi0_Wg, coords, self.wcomm) self.printtxt("Finished Slice along orbitals !") self.comm.barrier() del chi0_Wg if self.optical_limit: specfuncG0_WGv = np.zeros((self.NwS, self.npw, 3), dtype=complex) specfunc0G_WGv = np.zeros((self.NwS, self.npw, 3), dtype=complex) self.wScomm.all_gather(specfunc0G_wGv, specfunc0G_WGv) self.wScomm.all_gather(specfuncG0_wGv, specfuncG0_WGv) specfunc0G_wGv = specfunc0G_WGv specfuncG0_wGv = specfuncG0_WGv if self.optical_limit: self.chi00G_wGv = hilbert_transform( specfunc0G_wGv, self.w_w, self.Nw, self.dw, self.eta, self.full_hilbert_trans )[self.wstart : self.wend] self.chi0G0_wGv = hilbert_transform( specfuncG0_wGv, self.w_w, self.Nw, self.dw, self.eta, self.full_hilbert_trans )[self.wstart : self.wend] if self.optical_limit: self.chi00G_wGv /= self.vol self.chi0G0_wGv /= self.vol self.chi0_wGG = chi0_wGG self.chi0_wGG /= self.vol self.printtxt("") self.printtxt("Finished chi0 !")
def iterate_one_k_point(self, hamiltonian, wfs, kpt): """Do conjugate gradient iterations for the k-point""" niter = self.niter phi_G = wfs.empty(q=kpt.q) phi_old_G = wfs.empty(q=kpt.q) comm = wfs.gd.comm psit_nG, Htpsit_nG = self.subspace_diagonalize(hamiltonian, wfs, kpt) # Note that psit_nG is now in self.operator.work1_nG and # Htpsit_nG is in kpt.psit_nG! R_nG = reshape(self.Htpsit_nG, psit_nG.shape) Htphi_G = R_nG[0] R_nG[:] = Htpsit_nG self.timer.start('Residuals') self.calculate_residuals(kpt, wfs, hamiltonian, psit_nG, kpt.P_ani, kpt.eps_n, R_nG) self.timer.stop('Residuals') self.timer.start('CG') total_error = 0.0 for n in range(self.nbands): if extra_parameters.get('PK', False): N = n+1 else: N = psit_nG.shape[0]+1 R_G = R_nG[n] Htpsit_G = Htpsit_nG[n] gamma_old = 1.0 phi_old_G[:] = 0.0 error = np.real(wfs.integrate(R_G, R_G)) for nit in range(niter): if (error * Hartree**2 < self.tolerance / self.nbands): break ekin = self.preconditioner.calculate_kinetic_energy( psit_nG[n:n + 1], kpt) pR_G = self.preconditioner(R_nG[n:n + 1], kpt, ekin) # New search direction gamma = comm.sum(np.vdot(pR_G, R_G).real) phi_G[:] = -pR_G - gamma / gamma_old * phi_old_G gamma_old = gamma phi_old_G[:] = phi_G[:] # Calculate projections P2_ai = wfs.pt.dict() wfs.pt.integrate(phi_G, P2_ai, kpt.q) # Orthonormalize phi_G to all bands self.timer.start('CG: orthonormalize') self.timer.start('CG: overlap') overlap_n = wfs.integrate(psit_nG[:N], phi_G, global_integral=False) self.timer.stop('CG: overlap') self.timer.start('CG: overlap2') for a, P2_i in P2_ai.items(): P_ni = kpt.P_ani[a] dO_ii = wfs.setups[a].dO_ii gemv(1.0, P_ni[:N].conjugate(), np.inner(dO_ii, P2_i), 1.0, overlap_n) self.timer.stop('CG: overlap2') comm.sum(overlap_n) # phi_G -= overlap_n * kpt.psit_nG wfs.matrixoperator.gd.gemv(-1.0, psit_nG[:N], overlap_n, 1.0, phi_G, 'n') for a, P2_i in P2_ai.items(): P_ni = kpt.P_ani[a] gemv(-1.0, P_ni[:N], overlap_n, 1.0, P2_i, 'n') norm = wfs.integrate(phi_G, phi_G, global_integral=False) for a, P2_i in P2_ai.items(): dO_ii = wfs.setups[a].dO_ii norm += np.vdot(P2_i, np.inner(dO_ii, P2_i)) norm = comm.sum(np.real(norm).item()) phi_G /= sqrt(norm) for P2_i in P2_ai.values(): P2_i /= sqrt(norm) self.timer.stop('CG: orthonormalize') # find optimum linear combination of psit_G and phi_G an = kpt.eps_n[n] wfs.apply_pseudo_hamiltonian(kpt, hamiltonian, phi_G.reshape((1,) + phi_G.shape), Htphi_G.reshape((1,) + Htphi_G.shape)) b = wfs.integrate(phi_G, Htpsit_G, global_integral=False) c = wfs.integrate(phi_G, Htphi_G, global_integral=False) for a, P2_i in P2_ai.items(): P_i = kpt.P_ani[a][n] dH_ii = unpack(hamiltonian.dH_asp[a][kpt.s]) b += dot(P2_i, dot(dH_ii, P_i.conj())) c += dot(P2_i, dot(dH_ii, P2_i.conj())) b = comm.sum(np.real(b).item()) c = comm.sum(np.real(c).item()) theta = 0.5 * atan2(2 * b, an - c) enew = (an * cos(theta)**2 + c * sin(theta)**2 + b * sin(2.0 * theta)) # theta can correspond either minimum or maximum if (enew - kpt.eps_n[n]) > 0.0: # we were at maximum theta += pi / 2.0 enew = (an * cos(theta)**2 + c * sin(theta)**2 + b * sin(2.0 * theta)) kpt.eps_n[n] = enew psit_nG[n] *= cos(theta) # kpt.psit_nG[n] += sin(theta) * phi_G axpy(sin(theta), phi_G, psit_nG[n]) for a, P2_i in P2_ai.items(): P_i = kpt.P_ani[a][n] P_i *= cos(theta) P_i += sin(theta) * P2_i if nit < niter - 1: Htpsit_G *= cos(theta) # Htpsit_G += sin(theta) * Htphi_G axpy(sin(theta), Htphi_G, Htpsit_G) #adjust residuals R_G[:] = Htpsit_G - kpt.eps_n[n] * psit_nG[n] coef_ai = wfs.pt.dict() for a, coef_i in coef_ai.items(): P_i = kpt.P_ani[a][n] dO_ii = wfs.setups[a].dO_ii dH_ii = unpack(hamiltonian.dH_asp[a][kpt.s]) coef_i[:] = (dot(P_i, dH_ii) - dot(P_i * kpt.eps_n[n], dO_ii)) wfs.pt.add(R_G, coef_ai, kpt.q) error_new = np.real(wfs.integrate(R_G, R_G)) if error_new / error < self.rtol: # print >> self.f, "cg:iters", n, nit+1 break if (self.nbands_converge == 'occupied' and kpt.f_n is not None and kpt.f_n[n] == 0.0): # print >> self.f, "cg:iters", n, nit+1 break error = error_new if kpt.f_n is None: weight = 1.0 else: weight = kpt.f_n[n] if self.nbands_converge != 'occupied': weight = kpt.weight * float(n < self.nbands_converge) total_error += weight * error # if nit == 3: # print >> self.f, "cg:iters", n, nit+1 self.timer.stop('CG') return total_error, psit_nG
def calculate(self, seperate_spin=None): """Calculate the non-interacting density response function. """ calc = self.calc kd = self.kd gd = self.gd sdisp_cd = gd.sdisp_cd ibzk_kc = kd.ibzk_kc bzk_kc = kd.bzk_kc kq_k = self.kq_k f_skn = self.f_skn e_skn = self.e_skn # Matrix init chi0_wGG = np.zeros((self.Nw_local, self.npw, self.npw), dtype=complex) if self.hilbert_trans: specfunc_wGG = np.zeros((self.NwS_local, self.npw, self.npw), dtype = complex) # Prepare for the derivative of pseudo-wavefunction if self.optical_limit: d_c = [Gradient(gd, i, n=4, dtype=complex).apply for i in range(3)] dpsit_g = gd.empty(dtype=complex) tmp = np.zeros((3), dtype=complex) rhoG0_v = np.zeros(3, dtype=complex) self.chi0G0_wGv = np.zeros((self.Nw_local, self.npw, 3), dtype=complex) self.chi00G_wGv = np.zeros((self.Nw_local, self.npw, 3), dtype=complex) specfuncG0_wGv = np.zeros((self.NwS_local, self.npw, 3), dtype=complex) specfunc0G_wGv = np.zeros((self.NwS_local, self.npw, 3), dtype=complex) use_zher = False if self.eta < 1e-5: use_zher = True rho_G = np.zeros(self.npw, dtype=complex) t0 = time() if seperate_spin is None: spinlist = np.arange(self.nspins) else: spinlist = [seperate_spin] for spin in spinlist: if not (f_skn[spin] > self.ftol).any(): self.chi0_wGG = chi0_wGG continue for k in range(self.kstart, self.kend): k_pad = False if k >= self.kd.nbzkpts: k = 0 k_pad = True # Find corresponding kpoint in IBZ ibzkpt1 = kd.bz2ibz_k[k] if self.optical_limit: ibzkpt2 = ibzkpt1 else: ibzkpt2 = kd.bz2ibz_k[kq_k[k]] if self.pwmode: N_c = self.gd.N_c k_c = self.kd.ibzk_kc[ibzkpt1] eikr1_R = np.exp(2j * pi * np.dot(np.indices(N_c).T, k_c / N_c).T) k_c = self.kd.ibzk_kc[ibzkpt2] eikr2_R = np.exp(2j * pi * np.dot(np.indices(N_c).T, k_c / N_c).T) index1_g, phase1_g = kd.get_transform_wavefunction_index(self.gd.N_c - (self.pbc == False), k) index2_g, phase2_g = kd.get_transform_wavefunction_index(self.gd.N_c - (self.pbc == False), kq_k[k]) for n in range(self.nvalbands): if self.calc.wfs.world.size == 1: if (self.f_skn[spin][ibzkpt1, n] - self.ftol < 0): continue t1 = time() if self.pwmode: u = self.kd.get_rank_and_index(spin, ibzkpt1)[1] psitold_g = calc.wfs._get_wave_function_array(u, n, realspace=True, phase=eikr1_R) else: u = None psitold_g = self.get_wavefunction(ibzkpt1, n, True, spin=spin) psit1new_g = kd.transform_wave_function(psitold_g,k,index1_g,phase1_g) P1_ai = self.pawstuff(psit1new_g, k, n, spin, u, ibzkpt1) psit1_g = psit1new_g.conj() * self.expqr_g for m in self.mlist: if self.nbands > 1000 and m % 200 == 0: print(' ', k, n, m, time() - t0, file=self.txt) check_focc = (f_skn[spin][ibzkpt1, n] - f_skn[spin][ibzkpt2, m]) > self.ftol if not self.pwmode: psitold_g = self.get_wavefunction(ibzkpt2, m, check_focc, spin=spin) if check_focc: if self.pwmode: u = self.kd.get_rank_and_index(spin, ibzkpt2)[1] psitold_g = calc.wfs._get_wave_function_array(u, m, realspace=True, phase=eikr2_R) psit2_g = kd.transform_wave_function(psitold_g, kq_k[k], index2_g, phase2_g) # zero padding is included through the FFT rho_g = np.fft.fftn(psit2_g * psit1_g, s=self.nGrpad) * self.vol / self.nG0rpad # Here, planewave cutoff is applied rho_G = rho_g.ravel()[self.Gindex_G] if self.optical_limit: phase_cd = np.exp(2j * pi * sdisp_cd * kd.bzk_kc[kq_k[k], :, np.newaxis]) for ix in range(3): d_c[ix](psit2_g, dpsit_g, phase_cd) tmp[ix] = gd.integrate(psit1_g * dpsit_g) rho_G[0] = -1j * np.dot(self.qq_v, tmp) for ix in range(3): q2_c = np.diag((1,1,1))[ix] * self.qopt qq2_v = np.dot(q2_c, self.bcell_cv) # summation over c rhoG0_v[ix] = -1j * np.dot(qq2_v, tmp) P2_ai = self.pawstuff(psit2_g, kq_k[k], m, spin, u, ibzkpt2) for a, id in enumerate(calc.wfs.setups.id_a): P_p = np.outer(P1_ai[a].conj(), P2_ai[a]).ravel() gemv(1.0, self.phi_aGp[a], P_p, 1.0, rho_G) if self.optical_limit: gemv(1.0, self.phiG0_avp[a], P_p, 1.0, rhoG0_v) if self.optical_limit: if np.abs(self.enoshift_skn[spin][ibzkpt2, m] - self.enoshift_skn[spin][ibzkpt1, n]) > 0.1/Hartree: rho_G[0] /= self.enoshift_skn[spin][ibzkpt2, m] \ - self.enoshift_skn[spin][ibzkpt1, n] rhoG0_v /= self.enoshift_skn[spin][ibzkpt2, m] \ - self.enoshift_skn[spin][ibzkpt1, n] else: rho_G[0] = 0. rhoG0_v[:] = 0. if k_pad: rho_G[:] = 0. if self.optical_limit: rho0G_Gv = np.outer(rho_G.conj(), rhoG0_v) rhoG0_Gv = np.outer(rho_G, rhoG0_v.conj()) rho0G_Gv[0,:] = rhoG0_v * rhoG0_v.conj() rhoG0_Gv[0,:] = rhoG0_v * rhoG0_v.conj() if not self.hilbert_trans: if not use_zher: rho_GG = np.outer(rho_G, rho_G.conj()) for iw in range(self.Nw_local): w = self.w_w[iw + self.wstart] / Hartree coef = ( 1. / (w + e_skn[spin][ibzkpt1, n] - e_skn[spin][ibzkpt2, m] + 1j * self.eta) - 1. / (w - e_skn[spin][ibzkpt1, n] + e_skn[spin][ibzkpt2, m] + 1j * self.eta) ) C = (f_skn[spin][ibzkpt1, n] - f_skn[spin][ibzkpt2, m]) * coef if use_zher: czher(C.real, rho_G.conj(), chi0_wGG[iw]) else: axpy(C, rho_GG, chi0_wGG[iw]) if self.optical_limit: axpy(C, rho0G_Gv, self.chi00G_wGv[iw]) axpy(C, rhoG0_Gv, self.chi0G0_wGv[iw]) else: rho_GG = np.outer(rho_G, rho_G.conj()) focc = f_skn[spin][ibzkpt1,n] - f_skn[spin][ibzkpt2,m] w0 = e_skn[spin][ibzkpt2,m] - e_skn[spin][ibzkpt1,n] scal(focc, rho_GG) if self.optical_limit: scal(focc, rhoG0_Gv) scal(focc, rho0G_Gv) # calculate delta function w0_id = int(w0 / self.dw) if w0_id + 1 < self.NwS: # rely on the self.NwS_local is equal in each node! if self.wScomm.rank == w0_id // self.NwS_local: alpha = (w0_id + 1 - w0/self.dw) / self.dw axpy(alpha, rho_GG, specfunc_wGG[w0_id % self.NwS_local] ) if self.optical_limit: axpy(alpha, rho0G_Gv, specfunc0G_wGv[w0_id % self.NwS_local] ) axpy(alpha, rhoG0_Gv, specfuncG0_wGv[w0_id % self.NwS_local] ) if self.wScomm.rank == (w0_id+1) // self.NwS_local: alpha = (w0 / self.dw - w0_id) / self.dw axpy(alpha, rho_GG, specfunc_wGG[(w0_id+1) % self.NwS_local] ) if self.optical_limit: axpy(alpha, rho0G_Gv, specfunc0G_wGv[(w0_id+1) % self.NwS_local] ) axpy(alpha, rhoG0_Gv, specfuncG0_wGv[(w0_id+1) % self.NwS_local] ) # deltaw = delta_function(w0, self.dw, self.NwS, self.sigma) # for wi in range(self.NwS_local): # if deltaw[wi + self.wS1] > 1e-8: # specfunc_wGG[wi] += tmp_GG * deltaw[wi + self.wS1] if self.kd.nbzkpts == 1: if n == 0: dt = time() - t0 totaltime = dt * self.nvalbands * self.nspins self.printtxt('Finished n 0 in %d seconds, estimate %d seconds left.' %(dt, totaltime) ) if rank == 0 and self.nvalbands // 5 > 0: if n > 0 and n % (self.nvalbands // 5) == 0: dt = time() - t0 self.printtxt('Finished n %d in %d seconds, estimate %d seconds left.'%(n, dt, totaltime-dt)) if calc.wfs.world.size != 1: self.kcomm.barrier() if k == 0: dt = time() - t0 totaltime = dt * self.nkpt_local * self.nspins self.printtxt('Finished k 0 in %d seconds, estimate %d seconds left.' %(dt, totaltime)) if rank == 0 and self.nkpt_local // 5 > 0: if k > 0 and k % (self.nkpt_local // 5) == 0: dt = time() - t0 self.printtxt('Finished k %d in %d seconds, estimate %d seconds left. '%(k, dt, totaltime - dt) ) self.printtxt('Finished summation over k') self.kcomm.barrier() # Hilbert Transform if not self.hilbert_trans: for iw in range(self.Nw_local): self.kcomm.sum(chi0_wGG[iw]) if self.optical_limit: self.kcomm.sum(self.chi0G0_wGv[iw]) self.kcomm.sum(self.chi00G_wGv[iw]) if use_zher: assert (np.abs(chi0_wGG[0,1:,0]) < 1e-10).all() for iw in range(self.Nw_local): chi0_wGG[iw] += chi0_wGG[iw].conj().T for iG in range(self.npw): chi0_wGG[iw, iG, iG] /= 2. assert np.abs(np.imag(chi0_wGG[iw, iG, iG])) < 1e-10 else: for iw in range(self.NwS_local): self.kcomm.sum(specfunc_wGG[iw]) if self.optical_limit: self.kcomm.sum(specfuncG0_wGv[iw]) self.kcomm.sum(specfunc0G_wGv[iw]) if self.wScomm.size == 1: chi0_wGG = hilbert_transform(specfunc_wGG, self.w_w, self.Nw, self.dw, self.eta, self.full_hilbert_trans)[self.wstart:self.wend] self.printtxt('Finished hilbert transform !') del specfunc_wGG else: # redistribute specfunc_wGG to all nodes size = self.comm.size assert self.NwS % size == 0 NwStmp1 = (rank % self.kcomm.size) * self.NwS // size NwStmp2 = (rank % self.kcomm.size + 1) * self.NwS // size specfuncnew_wGG = specfunc_wGG[NwStmp1:NwStmp2] del specfunc_wGG coords = np.zeros(self.wcomm.size, dtype=int) nG_local = self.npw**2 // self.wcomm.size if self.wcomm.rank == self.wcomm.size - 1: nG_local = self.npw**2 - (self.wcomm.size - 1) * nG_local self.wcomm.all_gather(np.array([nG_local]), coords) specfunc_Wg = SliceAlongFrequency(specfuncnew_wGG, coords, self.wcomm) self.printtxt('Finished Slice Along Frequency !') chi0_Wg = hilbert_transform(specfunc_Wg, self.w_w, self.Nw, self.dw, self.eta, self.full_hilbert_trans)[:self.Nw] self.printtxt('Finished hilbert transform !') self.comm.barrier() del specfunc_Wg chi0_wGG = SliceAlongOrbitals(chi0_Wg, coords, self.wcomm) self.printtxt('Finished Slice along orbitals !') self.comm.barrier() del chi0_Wg if self.optical_limit: specfuncG0_WGv = np.zeros((self.NwS, self.npw, 3), dtype=complex) specfunc0G_WGv = np.zeros((self.NwS, self.npw, 3), dtype=complex) self.wScomm.all_gather(specfunc0G_wGv, specfunc0G_WGv) self.wScomm.all_gather(specfuncG0_wGv, specfuncG0_WGv) specfunc0G_wGv = specfunc0G_WGv specfuncG0_wGv = specfuncG0_WGv if self.optical_limit: self.chi00G_wGv = hilbert_transform(specfunc0G_wGv, self.w_w, self.Nw, self.dw, self.eta, self.full_hilbert_trans)[self.wstart:self.wend] self.chi0G0_wGv = hilbert_transform(specfuncG0_wGv, self.w_w, self.Nw, self.dw, self.eta, self.full_hilbert_trans)[self.wstart:self.wend] if self.optical_limit: self.chi00G_wGv /= self.vol self.chi0G0_wGv /= self.vol self.chi0_wGG = chi0_wGG self.chi0_wGG /= self.vol self.printtxt('') self.printtxt('Finished chi0 !')
def iterate_one_k_point(self, hamiltonian, wfs, kpt): """Do conjugate gradient iterations for the k-point""" niter = self.niter phi_G = wfs.empty(q=kpt.q) phi_old_G = wfs.empty(q=kpt.q) comm = wfs.gd.comm psit_nG, Htpsit_nG = self.subspace_diagonalize(hamiltonian, wfs, kpt) # Note that psit_nG is now in self.operator.work1_nG and # Htpsit_nG is in kpt.psit_nG! R_nG = reshape(self.Htpsit_nG, psit_nG.shape) Htphi_G = R_nG[0] R_nG[:] = Htpsit_nG self.timer.start('Residuals') self.calculate_residuals(kpt, wfs, hamiltonian, psit_nG, kpt.P_ani, kpt.eps_n, R_nG) self.timer.stop('Residuals') self.timer.start('CG') total_error = 0.0 for n in range(self.nbands): if extra_parameters.get('PK', False): N = n + 1 else: N = psit_nG.shape[0] + 1 R_G = R_nG[n] Htpsit_G = Htpsit_nG[n] gamma_old = 1.0 phi_old_G[:] = 0.0 error = np.real(wfs.integrate(R_G, R_G)) for nit in range(niter): if (error * Hartree**2 < self.tolerance / self.nbands): break ekin = self.preconditioner.calculate_kinetic_energy( psit_nG[n:n + 1], kpt) pR_G = self.preconditioner(R_nG[n:n + 1], kpt, ekin) # New search direction gamma = comm.sum(np.vdot(pR_G, R_G).real) phi_G[:] = -pR_G - gamma / gamma_old * phi_old_G gamma_old = gamma phi_old_G[:] = phi_G[:] # Calculate projections P2_ai = wfs.pt.dict() wfs.pt.integrate(phi_G, P2_ai, kpt.q) # Orthonormalize phi_G to all bands self.timer.start('CG: orthonormalize') self.timer.start('CG: overlap') overlap_n = wfs.integrate(psit_nG[:N], phi_G, global_integral=False) self.timer.stop('CG: overlap') self.timer.start('CG: overlap2') for a, P2_i in P2_ai.items(): P_ni = kpt.P_ani[a] dO_ii = wfs.setups[a].dO_ii gemv(1.0, P_ni[:N].conjugate(), np.inner(dO_ii, P2_i), 1.0, overlap_n) self.timer.stop('CG: overlap2') comm.sum(overlap_n) # phi_G -= overlap_n * kpt.psit_nG wfs.matrixoperator.gd.gemv(-1.0, psit_nG[:N], overlap_n, 1.0, phi_G, 'n') for a, P2_i in P2_ai.items(): P_ni = kpt.P_ani[a] gemv(-1.0, P_ni[:N], overlap_n, 1.0, P2_i, 'n') norm = wfs.integrate(phi_G, phi_G, global_integral=False) for a, P2_i in P2_ai.items(): dO_ii = wfs.setups[a].dO_ii norm += np.vdot(P2_i, np.inner(dO_ii, P2_i)) norm = comm.sum(np.real(norm).item()) phi_G /= sqrt(norm) for P2_i in P2_ai.values(): P2_i /= sqrt(norm) self.timer.stop('CG: orthonormalize') # find optimum linear combination of psit_G and phi_G an = kpt.eps_n[n] wfs.apply_pseudo_hamiltonian( kpt, hamiltonian, phi_G.reshape((1, ) + phi_G.shape), Htphi_G.reshape((1, ) + Htphi_G.shape)) b = wfs.integrate(phi_G, Htpsit_G, global_integral=False) c = wfs.integrate(phi_G, Htphi_G, global_integral=False) for a, P2_i in P2_ai.items(): P_i = kpt.P_ani[a][n] dH_ii = unpack(hamiltonian.dH_asp[a][kpt.s]) b += dot(P2_i, dot(dH_ii, P_i.conj())) c += dot(P2_i, dot(dH_ii, P2_i.conj())) b = comm.sum(np.real(b).item()) c = comm.sum(np.real(c).item()) theta = 0.5 * atan2(2 * b, an - c) enew = (an * cos(theta)**2 + c * sin(theta)**2 + b * sin(2.0 * theta)) # theta can correspond either minimum or maximum if (enew - kpt.eps_n[n]) > 0.0: # we were at maximum theta += pi / 2.0 enew = (an * cos(theta)**2 + c * sin(theta)**2 + b * sin(2.0 * theta)) kpt.eps_n[n] = enew psit_nG[n] *= cos(theta) # kpt.psit_nG[n] += sin(theta) * phi_G axpy(sin(theta), phi_G, psit_nG[n]) for a, P2_i in P2_ai.items(): P_i = kpt.P_ani[a][n] P_i *= cos(theta) P_i += sin(theta) * P2_i if nit < niter - 1: Htpsit_G *= cos(theta) # Htpsit_G += sin(theta) * Htphi_G axpy(sin(theta), Htphi_G, Htpsit_G) #adjust residuals R_G[:] = Htpsit_G - kpt.eps_n[n] * psit_nG[n] coef_ai = wfs.pt.dict() for a, coef_i in coef_ai.items(): P_i = kpt.P_ani[a][n] dO_ii = wfs.setups[a].dO_ii dH_ii = unpack(hamiltonian.dH_asp[a][kpt.s]) coef_i[:] = (dot(P_i, dH_ii) - dot(P_i * kpt.eps_n[n], dO_ii)) wfs.pt.add(R_G, coef_ai, kpt.q) error_new = np.real(wfs.integrate(R_G, R_G)) if error_new / error < self.rtol: # print >> self.f, "cg:iters", n, nit+1 break if (self.nbands_converge == 'occupied' and kpt.f_n is not None and kpt.f_n[n] == 0.0): # print >> self.f, "cg:iters", n, nit+1 break error = error_new if kpt.f_n is None: weight = 1.0 else: weight = kpt.f_n[n] if self.nbands_converge != 'occupied': weight = kpt.weight * float(n < self.nbands_converge) total_error += weight * error # if nit == 3: # print >> self.f, "cg:iters", n, nit+1 self.timer.stop('CG') return total_error, psit_nG
BY2_pq = np.empty((P, Q), dtype) t = time.time() for n in range(numreps): BY2_pq.fill(0.0) gemmdot(B_pqL, Y_L, 1.0, beta, BY2_pq) t = time.time() - t performance = numflop * numreps / t print 'gemmdot: %8.5f s, %8.5f Mflops' % (t, performance / 1024**2.) assert np.abs(BY0_pq - BY2_pq).max() < 5e-12 del BY2_pq BY3_pq = np.empty((P, Q), dtype) t = time.time() for n in range(numreps): BY3_pq.fill(0.0) gemv(1.0, B_pqL, Y_L, beta, BY3_pq, 't') t = time.time() - t performance = numflop * numreps / t print 'gemvT : %8.5f s, %8.5f Mflops' % (t, performance / 1024**2.) assert np.abs(BY0_pq - BY3_pq).max() < 5e-12 del BY3_pq B_xL = B_pqL.reshape((P * Q, L)) BY4_x = np.empty(P * Q, dtype) t = time.time() for n in range(numreps): BY4_x.fill(0.0) gemv(1.0, B_xL, Y_L, beta, BY4_x, 't') t = time.time() - t performance = numflop * numreps / t print 'gemvT2D: %8.5f s, %8.5f Mflops' % (t, performance / 1024**2.)
def main(M=160, N=120, K=140, seed=42, mprocs=2, nprocs=2, dtype=float): gen = np.random.RandomState(seed) grid = BlacsGrid(world, mprocs, nprocs) if dtype == complex: epsilon = 1.0j else: epsilon = 0.0 # Create descriptors for matrices on master: globA = grid.new_descriptor(M, K, M, K) globB = grid.new_descriptor(K, N, K, N) globC = grid.new_descriptor(M, N, M, N) globZ = grid.new_descriptor(K, K, K, K) globX = grid.new_descriptor(K, 1, K, 1) globY = grid.new_descriptor(M, 1, M, 1) globD = grid.new_descriptor(M, K, M, K) globS = grid.new_descriptor(M, M, M, M) globU = grid.new_descriptor(M, M, M, M) globHEC = grid.new_descriptor(K, K, K, K) # print globA.asarray() # Populate matrices local to master: A0 = gen.rand(*globA.shape) + epsilon * gen.rand(*globA.shape) B0 = gen.rand(*globB.shape) + epsilon * gen.rand(*globB.shape) D0 = gen.rand(*globD.shape) + epsilon * gen.rand(*globD.shape) X0 = gen.rand(*globX.shape) + epsilon * gen.rand(*globX.shape) # HEC = HEA * B HEA0 = gen.rand(*globHEC.shape) + epsilon * gen.rand(*globHEC.shape) if world.rank == 0: HEA0 = HEA0 + HEA0.T.conjugate() # Make H0 hermitean # Local result matrices Y0 = globY.empty(dtype=dtype) C0 = globC.zeros(dtype=dtype) Z0 = globZ.zeros(dtype=dtype) S0 = globS.zeros(dtype=dtype) # zeros needed for rank-updates U0 = globU.zeros(dtype=dtype) # zeros needed for rank-updates HEC0 = globB.zeros(dtype=dtype) # Local reference matrix product: if rank == 0: # C0[:] = np.dot(A0, B0) gemm(1.0, B0, A0, 0.0, C0) # gemm(1.0, A0, A0, 0.0, Z0, transa='t') print(A0.shape, Z0.shape) Z0[:] = np.dot(A0.T, A0) # Y0[:] = np.dot(A0, X0) gemv(1.0, A0, X0.ravel(), 0.0, Y0.ravel()) r2k(1.0, A0, D0, 0.0, S0) rk(1.0, A0, 0.0, U0) HEC0[:] = np.dot(HEA0, B0) sM, sN = HEA0.shape # We don't use upper diagonal for i in range(sM): for j in range(sN): if i < j: HEA0[i][j] = 99999.0 if world.rank == 0: print(HEA0) assert globA.check(A0) and globB.check(B0) and globC.check(C0) assert globX.check(X0) and globY.check(Y0) assert globD.check(D0) and globS.check(S0) and globU.check(U0) # Create distributed destriptors with various block sizes: distA = grid.new_descriptor(M, K, 2, 2) distB = grid.new_descriptor(K, N, 2, 4) distC = grid.new_descriptor(M, N, 3, 2) distZ = grid.new_descriptor(K, K, 5, 7) distX = grid.new_descriptor(K, 1, 4, 1) distY = grid.new_descriptor(M, 1, 3, 1) distD = grid.new_descriptor(M, K, 2, 3) distS = grid.new_descriptor(M, M, 2, 2) distU = grid.new_descriptor(M, M, 2, 2) distHE = grid.new_descriptor(K, K, 2, 4) # Distributed matrices: A = distA.empty(dtype=dtype) B = distB.empty(dtype=dtype) C = distC.empty(dtype=dtype) Z = distZ.empty(dtype=dtype) X = distX.empty(dtype=dtype) Y = distY.empty(dtype=dtype) D = distD.empty(dtype=dtype) S = distS.zeros(dtype=dtype) # zeros needed for rank-updates U = distU.zeros(dtype=dtype) # zeros needed for rank-updates HEC = distB.zeros(dtype=dtype) HEA = distHE.zeros(dtype=dtype) Redistributor(world, globA, distA).redistribute(A0, A) Redistributor(world, globB, distB).redistribute(B0, B) Redistributor(world, globX, distX).redistribute(X0, X) Redistributor(world, globD, distD).redistribute(D0, D) Redistributor(world, globHEC, distHE).redistribute(HEA0, HEA) pblas_simple_gemm(distA, distB, distC, A, B, C) pblas_simple_gemm(distA, distA, distZ, A, A, Z, transa="T") pblas_simple_gemv(distA, distX, distY, A, X, Y) pblas_simple_r2k(distA, distD, distS, A, D, S) pblas_simple_rk(distA, distU, A, U) pblas_simple_hemm(distHE, distB, distB, HEA, B, HEC, uplo="L", side="L") # Collect result back on master C1 = globC.empty(dtype=dtype) Y1 = globY.empty(dtype=dtype) S1 = globS.zeros(dtype=dtype) # zeros needed for rank-updates U1 = globU.zeros(dtype=dtype) # zeros needed for rank-updates HEC1 = globB.zeros(dtype=dtype) Redistributor(world, distC, globC).redistribute(C, C1) Redistributor(world, distY, globY).redistribute(Y, Y1) Redistributor(world, distS, globS).redistribute(S, S1) Redistributor(world, distU, globU).redistribute(U, U1) Redistributor(world, distB, globB).redistribute(HEC, HEC1) if rank == 0: gemm_err = abs(C1 - C0).max() gemv_err = abs(Y1 - Y0).max() r2k_err = abs(S1 - S0).max() rk_err = abs(U1 - U0).max() hemm_err = abs(HEC1 - HEC0).max() print("gemm err", gemm_err) print("gemv err", gemv_err) print("r2k err", r2k_err) print("rk_err", rk_err) print("hemm_err", hemm_err) else: gemm_err = 0.0 gemv_err = 0.0 r2k_err = 0.0 rk_err = 0.0 hemm_err = 0.0 gemm_err = world.sum(gemm_err) # We don't like exceptions on only one cpu gemv_err = world.sum(gemv_err) r2k_err = world.sum(r2k_err) rk_err = world.sum(rk_err) hemm_err = world.sum(hemm_err) equal(gemm_err, 0, tol) equal(gemv_err, 0, tol) equal(r2k_err, 0, tol) equal(rk_err, 0, tol) equal(hemm_err, 0, tol)
def gemv(self, alpha, psit_nG, C_n, beta, newpsit_G, trans='t'): """Helper function for CG eigensolver.""" gemv(alpha, psit_nG, C_n, beta, newpsit_G, trans)
def gemv(self, alpha, psit_nG, C_n, beta, newpsit_G, trans='t'): """Helper function for CG eigensolver.""" if self.dtype == float: psit_nG = psit_nG.view(float) newpsit_G = newpsit_G.view(float) gemv(alpha, psit_nG, C_n, beta, newpsit_G, trans)
BY2_pq = np.empty((P,Q), dtype) t = time.time() for n in range(numreps): BY2_pq.fill(0.0) gemmdot(B_pqL, Y_L, 1.0, beta, BY2_pq) t = time.time()-t performance = numflop*numreps/t print 'gemmdot: %8.5f s, %8.5f Mflops' % (t,performance/1024**2.) assert np.abs(BY0_pq-BY2_pq).max()<5e-12 del BY2_pq BY3_pq = np.empty((P,Q), dtype) t = time.time() for n in range(numreps): BY3_pq.fill(0.0) gemv(1.0, B_pqL, Y_L, beta, BY3_pq, 't') t = time.time()-t performance = numflop*numreps/t print 'gemvT : %8.5f s, %8.5f Mflops' % (t,performance/1024**2.) assert np.abs(BY0_pq-BY3_pq).max()<5e-12 del BY3_pq B_xL = B_pqL.reshape((P*Q,L)) BY4_x = np.empty(P*Q, dtype) t = time.time() for n in range(numreps): BY4_x.fill(0.0) gemv(1.0, B_xL, Y_L, beta, BY4_x, 't') t = time.time()-t performance = numflop*numreps/t print 'gemvT2D: %8.5f s, %8.5f Mflops' % (t,performance/1024**2.)
def density_matrix(self, n, m, k, kq=None, spin1=0, spin2=0, phi_aGp=None, Gspace=True): gd = self.gd kd = self.kd optical_limit = False if kq is None: kq = self.kq_k[k] expqr_g = self.expqr_g q_v = self.qq_v optical_limit = self.optical_limit q_c = self.q_c else: q_c = kd.bzk_kc[kq] - kd.bzk_kc[k] q_c[np.where(q_c>0.501)] -= 1 q_c[np.where(q_c<-0.499)] += 1 if (np.abs(q_c) < self.ftol).all(): optical_limit = True q_c = self.q_c q_v = np.dot(q_c, self.bcell_cv) r_vg = gd.get_grid_point_coordinates() # (3, nG) qr_g = gemmdot(q_v, r_vg, beta=0.0) expqr_g = np.exp(-1j * qr_g) if optical_limit: expqr_g = 1 ibzkpt1 = kd.bz2ibz_k[k] ibzkpt2 = kd.bz2ibz_k[kq] psitold_g = self.get_wavefunction(ibzkpt1, n, True, spin=spin1) psit1_g = kd.transform_wave_function(psitold_g, k) psitold_g = self.get_wavefunction(ibzkpt2, m, True, spin=spin2) psit2_g = kd.transform_wave_function(psitold_g, kq) if Gspace is False: return psit1_g.conj() * psit2_g * expqr_g else: tmp_g = psit1_g.conj()* psit2_g * expqr_g # zero padding is included through the FFT rho_g = np.fft.fftn(tmp_g, s=self.nGrpad) * self.vol / self.nG0rpad # Here, planewave cutoff is applied rho_G = rho_g.ravel()[self.Gindex_G] if optical_limit: dpsit_g = gd.empty(dtype=complex) tmp = np.zeros((3), dtype=complex) phase_cd = np.exp(2j * pi * gd.sdisp_cd * kd.bzk_kc[kq, :, np.newaxis]) for ix in range(3): self.d_c[ix](psit2_g, dpsit_g, phase_cd) tmp[ix] = gd.integrate(psit1_g.conj() * dpsit_g) rho_G[0] = -1j * np.dot(q_v, tmp) calc = self.calc pt = self.pt if not self.pwmode: if calc.wfs.world.size > 1 or kd.nbzkpts == 1: P1_ai = pt.dict() pt.integrate(psit1_g, P1_ai, k) P2_ai = pt.dict() pt.integrate(psit2_g, P2_ai, kq) else: P1_ai = self.get_P_ai(k, n, spin1) P2_ai = self.get_P_ai(kq, m, spin2) else: # first calculate P_ai at ibzkpt, then rotate to k u = self.kd.get_rank_and_index(spin1, ibzkpt1)[1] Ptmp_ai = pt.dict() kpt = calc.wfs.kpt_u[u] pt.integrate(kpt.psit_nG[n], Ptmp_ai, ibzkpt1) P1_ai = self.get_P_ai(k, n, spin1, Ptmp_ai) u = self.kd.get_rank_and_index(spin2, ibzkpt2)[1] Ptmp_ai = pt.dict() kpt = calc.wfs.kpt_u[u] pt.integrate(kpt.psit_nG[m], Ptmp_ai, ibzkpt2) P2_ai = self.get_P_ai(kq, m, spin2, Ptmp_ai) if phi_aGp is None: try: if not self.mode == 'RPA': if optical_limit: iq = kd.where_is_q(np.zeros(3), self.bzq_qc) else: iq = kd.where_is_q(q_c, self.bzq_qc) assert np.abs(self.bzq_qc[iq] - q_c).sum() < 1e-8 phi_aGp = self.load_phi_aGp(self.reader, iq) #phi_qaGp[iq] except AttributeError: phi_aGp = self.phi_aGp for a, id in enumerate(self.calc.wfs.setups.id_a): P_p = np.outer(P1_ai[a].conj(), P2_ai[a]).ravel() phi_Gp = np.ascontiguousarray(phi_aGp[a], complex) gemv(1.0, phi_Gp, P_p, 1.0, rho_G) if optical_limit: if n==m: rho_G[0] = 1. elif np.abs(self.e_skn[spin2][ibzkpt2, m] - self.e_skn[spin1][ibzkpt1, n]) < 1e-5: rho_G[0] = 0. else: rho_G[0] /= (self.enoshift_skn[spin2][ibzkpt2, m] - self.enoshift_skn[spin1][ibzkpt1, n]) return rho_G
def iterate_one_k_point(self, ham, wfs, kpt): """Do conjugate gradient iterations for the k-point""" self.timer.start('CG') niter = self.niter phi_G, phi_old_G, Htphi_G = wfs.empty(3, q=kpt.q) comm = wfs.gd.comm if self.tw_coeff: # Wait! What business does the eigensolver have changing # the properties of the Hamiltonian? We are not updating # the Hamiltonian here. Moreover, what is supposed to # happen if this function is called multiple times per # iteration? Then we keep dividing the potential by the # same number. What on earth is the meaning of this? # # Also the parameter tw_coeff is undocumented. What is it? ham.vt_sG /= self.tw_coeff # Assuming the ordering in dH_asp and wfs is the same for a in ham.dH_asp.keys(): ham.dH_asp[a] /= self.tw_coeff psit = kpt.psit R = psit.new(buf=wfs.work_array) P = kpt.projections P2 = P.new() self.subspace_diagonalize(ham, wfs, kpt) Htpsit = psit.new(buf=self.Htpsit_nG) R.array[:] = Htpsit.array self.calculate_residuals(kpt, wfs, ham, psit, P, kpt.eps_n, R, P2) total_error = 0.0 for n in range(self.nbands): if extra_parameters.get('PK', False): N = n + 1 else: N = self.nbands R_G = R.array[n] Htpsit_G = Htpsit.array[n] psit_G = psit.array[n] gamma_old = 1.0 phi_old_G[:] = 0.0 error = np.real(wfs.integrate(R_G, R_G)) for nit in range(niter): if (error * Hartree**2 < self.tolerance / self.nbands): break ekin = self.preconditioner.calculate_kinetic_energy(psit_G, kpt) pR_G = self.preconditioner(R_G, kpt, ekin) # New search direction gamma = comm.sum(np.vdot(pR_G, R_G).real) phi_G[:] = -pR_G - gamma / gamma_old * phi_old_G gamma_old = gamma phi_old_G[:] = phi_G[:] # Calculate projections P2_ai = wfs.pt.dict() wfs.pt.integrate(phi_G, P2_ai, kpt.q) # Orthonormalize phi_G to all bands self.timer.start('CG: orthonormalize') self.timer.start('CG: overlap') overlap_n = wfs.integrate(psit.array[:N], phi_G, global_integral=False) self.timer.stop('CG: overlap') self.timer.start('CG: overlap2') for a, P2_i in P2_ai.items(): P_ni = kpt.P_ani[a] dO_ii = wfs.setups[a].dO_ii overlap_n += np.dot(P_ni[:N].conjugate(), np.dot(dO_ii, P2_i)) self.timer.stop('CG: overlap2') comm.sum(overlap_n) gemv(-1.0, psit.array[:N].view(wfs.dtype), overlap_n, 1.0, phi_G.view(wfs.dtype), 'n') for a, P2_i in P2_ai.items(): P_ni = kpt.P_ani[a] P2_i -= np.dot(overlap_n, P_ni[:N]) norm = wfs.integrate(phi_G, phi_G, global_integral=False) for a, P2_i in P2_ai.items(): dO_ii = wfs.setups[a].dO_ii norm += np.vdot(P2_i, np.dot(dO_ii, P2_i)) norm = comm.sum(float(np.real(norm))) phi_G /= sqrt(norm) for P2_i in P2_ai.values(): P2_i /= sqrt(norm) self.timer.stop('CG: orthonormalize') # find optimum linear combination of psit_G and phi_G an = kpt.eps_n[n] wfs.apply_pseudo_hamiltonian(kpt, ham, phi_G.reshape((1,) + phi_G.shape), Htphi_G.reshape((1,) + Htphi_G.shape)) b = wfs.integrate(phi_G, Htpsit_G, global_integral=False) c = wfs.integrate(phi_G, Htphi_G, global_integral=False) for a, P2_i in P2_ai.items(): P_i = kpt.P_ani[a][n] dH_ii = unpack(ham.dH_asp[a][kpt.s]) b += dot(P2_i, dot(dH_ii, P_i.conj())) c += dot(P2_i, dot(dH_ii, P2_i.conj())) b = comm.sum(float(np.real(b))) c = comm.sum(float(np.real(c))) theta = 0.5 * atan2(2 * b, an - c) enew = (an * cos(theta)**2 + c * sin(theta)**2 + b * sin(2.0 * theta)) # theta can correspond either minimum or maximum if (enew - kpt.eps_n[n]) > 0.0: # we were at maximum theta += pi / 2.0 enew = (an * cos(theta)**2 + c * sin(theta)**2 + b * sin(2.0 * theta)) kpt.eps_n[n] = enew psit_G *= cos(theta) # kpt.psit_nG[n] += sin(theta) * phi_G axpy(sin(theta), phi_G, psit_G) for a, P2_i in P2_ai.items(): P_i = kpt.P_ani[a][n] P_i *= cos(theta) P_i += sin(theta) * P2_i if nit < niter - 1: Htpsit_G *= cos(theta) # Htpsit_G += sin(theta) * Htphi_G axpy(sin(theta), Htphi_G, Htpsit_G) # adjust residuals R_G[:] = Htpsit_G - kpt.eps_n[n] * psit_G coef_ai = wfs.pt.dict() for a, coef_i in coef_ai.items(): P_i = kpt.P_ani[a][n] dO_ii = wfs.setups[a].dO_ii dH_ii = unpack(ham.dH_asp[a][kpt.s]) coef_i[:] = (dot(P_i, dH_ii) - dot(P_i * kpt.eps_n[n], dO_ii)) wfs.pt.add(R_G, coef_ai, kpt.q) error_new = np.real(wfs.integrate(R_G, R_G)) if error_new / error < self.rtol: # print >> self.f, "cg:iters", n, nit+1 break if (self.nbands_converge == 'occupied' and kpt.f_n is not None and kpt.f_n[n] == 0.0): # print >> self.f, "cg:iters", n, nit+1 break error = error_new if kpt.f_n is None: weight = 1.0 else: weight = kpt.f_n[n] if self.nbands_converge != 'occupied': weight = kpt.weight * float(n < self.nbands_converge) total_error += weight * error # if nit == 3: # print >> self.f, "cg:iters", n, nit+1 if self.tw_coeff: # undo the scaling for calculating energies for i in range(len(kpt.eps_n)): kpt.eps_n[i] *= self.tw_coeff ham.vt_sG *= self.tw_coeff # Assuming the ordering in dH_asp and wfs is the same for a in ham.dH_asp.keys(): ham.dH_asp[a] *= self.tw_coeff self.timer.stop('CG') return total_error
def density_matrix(self, n, m, k, kq=None, spin1=0, spin2=0, phi_aGp=None, Gspace=True): gd = self.gd kd = self.kd optical_limit = False if kq is None: kq = self.kq_k[k] expqr_g = self.expqr_g q_v = self.qq_v optical_limit = self.optical_limit q_c = self.q_c else: q_c = kd.bzk_kc[kq] - kd.bzk_kc[k] q_c[np.where(q_c > 0.501)] -= 1 q_c[np.where(q_c < -0.499)] += 1 if (np.abs(q_c) < self.ftol).all(): optical_limit = True q_c = self.q_c q_v = np.dot(q_c, self.bcell_cv) r_vg = gd.get_grid_point_coordinates() # (3, nG) qr_g = gemmdot(q_v, r_vg, beta=0.0) expqr_g = np.exp(-1j * qr_g) if optical_limit: expqr_g = 1 ibzkpt1 = kd.bz2ibz_k[k] ibzkpt2 = kd.bz2ibz_k[kq] psitold_g = self.get_wavefunction(ibzkpt1, n, True, spin=spin1) psit1_g = kd.transform_wave_function(psitold_g, k) psitold_g = self.get_wavefunction(ibzkpt2, m, True, spin=spin2) psit2_g = kd.transform_wave_function(psitold_g, kq) if Gspace is False: return psit1_g.conj() * psit2_g * expqr_g else: tmp_g = psit1_g.conj() * psit2_g * expqr_g # zero padding is included through the FFT rho_g = np.fft.fftn(tmp_g, s=self.nGrpad) * self.vol / self.nG0rpad # Here, planewave cutoff is applied rho_G = rho_g.ravel()[self.Gindex_G] if optical_limit: dpsit_g = gd.empty(dtype=complex) tmp = np.zeros((3), dtype=complex) phase_cd = np.exp(2j * pi * gd.sdisp_cd * kd.bzk_kc[kq, :, np.newaxis]) for ix in range(3): self.d_c[ix](psit2_g, dpsit_g, phase_cd) tmp[ix] = gd.integrate(psit1_g.conj() * dpsit_g) rho_G[0] = -1j * np.dot(q_v, tmp) calc = self.calc pt = self.pt if not self.pwmode: if calc.wfs.world.size > 1 or kd.nbzkpts == 1: P1_ai = pt.dict() pt.integrate(psit1_g, P1_ai, k) P2_ai = pt.dict() pt.integrate(psit2_g, P2_ai, kq) else: P1_ai = self.get_P_ai(k, n, spin1) P2_ai = self.get_P_ai(kq, m, spin2) else: # first calculate P_ai at ibzkpt, then rotate to k u = self.kd.get_rank_and_index(spin1, ibzkpt1)[1] Ptmp_ai = pt.dict() kpt = calc.wfs.kpt_u[u] pt.integrate(kpt.psit_nG[n], Ptmp_ai, ibzkpt1) P1_ai = self.get_P_ai(k, n, spin1, Ptmp_ai) u = self.kd.get_rank_and_index(spin2, ibzkpt2)[1] Ptmp_ai = pt.dict() kpt = calc.wfs.kpt_u[u] pt.integrate(kpt.psit_nG[m], Ptmp_ai, ibzkpt2) P2_ai = self.get_P_ai(kq, m, spin2, Ptmp_ai) if phi_aGp is None: try: if not self.mode == 'RPA': if optical_limit: iq = kd.where_is_q(np.zeros(3), self.bzq_qc) else: iq = kd.where_is_q(q_c, self.bzq_qc) assert np.abs(self.bzq_qc[iq] - q_c).sum() < 1e-8 phi_aGp = self.load_phi_aGp(self.reader, iq) #phi_qaGp[iq] except AttributeError: phi_aGp = self.phi_aGp for a, id in enumerate(self.calc.wfs.setups.id_a): P_p = np.outer(P1_ai[a].conj(), P2_ai[a]).ravel() phi_Gp = np.ascontiguousarray(phi_aGp[a], complex) gemv(1.0, phi_Gp, P_p, 1.0, rho_G) if optical_limit: if n == m: rho_G[0] = 1. elif np.abs(self.e_skn[spin2][ibzkpt2, m] - self.e_skn[spin1][ibzkpt1, n]) < 1e-5: rho_G[0] = 0. else: rho_G[0] /= (self.enoshift_skn[spin2][ibzkpt2, m] - self.enoshift_skn[spin1][ibzkpt1, n]) return rho_G
def calculate(self, spin=0): """Calculate the non-interacting density response function. """ calc = self.calc kd = self.kd gd = self.gd sdisp_cd = gd.sdisp_cd ibzk_kc = self.ibzk_kc bzk_kc = self.bzk_kc kq_k = self.kq_k pt = self.pt f_kn = self.f_kn e_kn = self.e_kn # Matrix init chi0_wGG = np.zeros((self.Nw_local, self.npw, self.npw), dtype=complex) if not (f_kn > self.ftol).any(): self.chi0_wGG = chi0_wGG return if self.hilbert_trans: specfunc_wGG = np.zeros((self.NwS_local, self.npw, self.npw), dtype=complex) # Prepare for the derivative of pseudo-wavefunction if self.optical_limit: d_c = [Gradient(gd, i, n=4, dtype=complex).apply for i in range(3)] dpsit_g = gd.empty(dtype=complex) tmp = np.zeros((3), dtype=complex) rho_G = np.zeros(self.npw, dtype=complex) t0 = time() t_get_wfs = 0 for k in range(self.kstart, self.kend): # Find corresponding kpoint in IBZ ibzkpt1 = kd.kibz_k[k] if self.optical_limit: ibzkpt2 = ibzkpt1 else: ibzkpt2 = kd.kibz_k[kq_k[k]] for n in range(self.nstart, self.nend): # print >> self.txt, k, n, t_get_wfs, time() - t0 t1 = time() psitold_g = self.get_wavefunction(ibzkpt1, n, True, spin=spin) t_get_wfs += time() - t1 psit1new_g = kd.transform_wave_function(psitold_g, k) P1_ai = pt.dict() pt.integrate(psit1new_g, P1_ai, k) psit1_g = psit1new_g.conj() * self.expqr_g for m in range(self.nbands): if self.hilbert_trans: check_focc = (f_kn[ibzkpt1, n] - f_kn[ibzkpt2, m]) > self.ftol else: check_focc = np.abs(f_kn[ibzkpt1, n] - f_kn[ibzkpt2, m]) > self.ftol t1 = time() psitold_g = self.get_wavefunction(ibzkpt2, m, check_focc, spin=spin) t_get_wfs += time() - t1 if check_focc: psit2_g = kd.transform_wave_function( psitold_g, kq_k[k]) P2_ai = pt.dict() pt.integrate(psit2_g, P2_ai, kq_k[k]) # fft tmp_g = np.fft.fftn( psit2_g * psit1_g) * self.vol / self.nG0 for iG in range(self.npw): index = self.Gindex_G[iG] rho_G[iG] = tmp_g[index[0], index[1], index[2]] if self.optical_limit: phase_cd = np.exp(2j * pi * sdisp_cd * bzk_kc[kq_k[k], :, np.newaxis]) for ix in range(3): d_c[ix](psit2_g, dpsit_g, phase_cd) tmp[ix] = gd.integrate(psit1_g * dpsit_g) rho_G[0] = -1j * np.dot(self.qq_v, tmp) # PAW correction for a, id in enumerate(calc.wfs.setups.id_a): P_p = np.outer(P1_ai[a].conj(), P2_ai[a]).ravel() gemv(1.0, self.phi_aGp[a], P_p, 1.0, rho_G) if self.optical_limit: rho_G[0] /= e_kn[ibzkpt2, m] - e_kn[ibzkpt1, n] rho_GG = np.outer(rho_G, rho_G.conj()) if not self.hilbert_trans: for iw in range(self.Nw_local): w = self.w_w[iw + self.wstart] / Hartree C = (f_kn[ibzkpt1, n] - f_kn[ibzkpt2, m]) / ( w + e_kn[ibzkpt1, n] - e_kn[ibzkpt2, m] + 1j * self.eta) axpy(C, rho_GG, chi0_wGG[iw]) else: focc = f_kn[ibzkpt1, n] - f_kn[ibzkpt2, m] w0 = e_kn[ibzkpt2, m] - e_kn[ibzkpt1, n] scal(focc, rho_GG) # calculate delta function w0_id = int(w0 / self.dw) if w0_id + 1 < self.NwS: # rely on the self.NwS_local is equal in each node! if self.wScomm.rank == w0_id // self.NwS_local: alpha = (w0_id + 1 - w0 / self.dw) / self.dw axpy(alpha, rho_GG, specfunc_wGG[w0_id % self.NwS_local]) if self.wScomm.rank == (w0_id + 1) // self.NwS_local: alpha = (w0 / self.dw - w0_id) / self.dw axpy( alpha, rho_GG, specfunc_wGG[(w0_id + 1) % self.NwS_local]) # deltaw = delta_function(w0, self.dw, self.NwS, self.sigma) # for wi in range(self.NwS_local): # if deltaw[wi + self.wS1] > 1e-8: # specfunc_wGG[wi] += tmp_GG * deltaw[wi + self.wS1] if self.nkpt == 1: if n == 0: dt = time() - t0 totaltime = dt * self.nband_local self.printtxt( 'Finished n 0 in %f seconds, estimated %f seconds left.' % (dt, totaltime)) if rank == 0 and self.nband_local // 5 > 0: if n > 0 and n % (self.nband_local // 5) == 0: dt = time() - t0 self.printtxt( 'Finished n %d in %f seconds, estimated %f seconds left.' % (n, dt, totaltime - dt)) if calc.wfs.world.size != 1: self.kcomm.barrier() if k == 0: dt = time() - t0 totaltime = dt * self.nkpt_local self.printtxt( 'Finished k 0 in %f seconds, estimated %f seconds left.' % (dt, totaltime)) if rank == 0 and self.nkpt_local // 5 > 0: if k > 0 and k % (self.nkpt_local // 5) == 0: dt = time() - t0 self.printtxt( 'Finished k %d in %f seconds, estimated %f seconds left. ' % (k, dt, totaltime - dt)) self.printtxt('Finished summation over k') self.kcomm.barrier() del rho_GG, rho_G # Hilbert Transform if not self.hilbert_trans: self.kcomm.sum(chi0_wGG) else: self.kcomm.sum(specfunc_wGG) if self.wScomm.size == 1: if not self.full_hilbert_trans: chi0_wGG = hilbert_transform( specfunc_wGG, self.Nw, self.dw, self.eta)[self.wstart:self.wend] else: chi0_wGG = full_hilbert_transform( specfunc_wGG, self.Nw, self.dw, self.eta)[self.wstart:self.wend] self.printtxt('Finished hilbert transform !') del specfunc_wGG else: # redistribute specfunc_wGG to all nodes assert self.NwS % size == 0 NwStmp1 = (rank % self.kcomm.size) * self.NwS // size NwStmp2 = (rank % self.kcomm.size + 1) * self.NwS // size specfuncnew_wGG = specfunc_wGG[NwStmp1:NwStmp2] del specfunc_wGG coords = np.zeros(self.wcomm.size, dtype=int) nG_local = self.npw**2 // self.wcomm.size if self.wcomm.rank == self.wcomm.size - 1: nG_local = self.npw**2 - (self.wcomm.size - 1) * nG_local self.wcomm.all_gather(np.array([nG_local]), coords) specfunc_Wg = SliceAlongFrequency(specfuncnew_wGG, coords, self.wcomm) self.printtxt('Finished Slice Along Frequency !') if not self.full_hilbert_trans: chi0_Wg = hilbert_transform(specfunc_Wg, self.Nw, self.dw, self.eta)[:self.Nw] else: chi0_Wg = full_hilbert_transform(specfunc_Wg, self.Nw, self.dw, self.eta)[:self.Nw] self.printtxt('Finished hilbert transform !') self.comm.barrier() del specfunc_Wg chi0_wGG = SliceAlongOrbitals(chi0_Wg, coords, self.wcomm) self.printtxt('Finished Slice along orbitals !') self.comm.barrier() del chi0_Wg self.chi0_wGG = chi0_wGG / self.vol self.printtxt('') self.printtxt('Finished chi0 !') return
def main(M=160, N=120, K=140, seed=42, mprocs=2, nprocs=2, dtype=float): gen = np.random.RandomState(seed) grid = BlacsGrid(world, mprocs, nprocs) if dtype == complex: epsilon = 1.0j else: epsilon = 0.0 # Create descriptors for matrices on master: globA = grid.new_descriptor(M, K, M, K) globB = grid.new_descriptor(K, N, K, N) globC = grid.new_descriptor(M, N, M, N) globZ = grid.new_descriptor(K, K, K, K) globX = grid.new_descriptor(K, 1, K, 1) globY = grid.new_descriptor(M, 1, M, 1) globD = grid.new_descriptor(M, K, M, K) globS = grid.new_descriptor(M, M, M, M) globU = grid.new_descriptor(M, M, M, M) globHEC = grid.new_descriptor(K, K, K, K) # print globA.asarray() # Populate matrices local to master: A0 = gen.rand(*globA.shape) + epsilon * gen.rand(*globA.shape) B0 = gen.rand(*globB.shape) + epsilon * gen.rand(*globB.shape) D0 = gen.rand(*globD.shape) + epsilon * gen.rand(*globD.shape) X0 = gen.rand(*globX.shape) + epsilon * gen.rand(*globX.shape) # HEC = HEA * B HEA0 = gen.rand(*globHEC.shape) + epsilon * gen.rand(*globHEC.shape) if world.rank == 0: HEA0 = HEA0 + HEA0.T.conjugate() # Make H0 hermitean HEA0 = np.ascontiguousarray(HEA0) # Local result matrices Y0 = globY.empty(dtype=dtype) C0 = globC.zeros(dtype=dtype) Z0 = globZ.zeros(dtype=dtype) S0 = globS.zeros(dtype=dtype) # zeros needed for rank-updates U0 = globU.zeros(dtype=dtype) # zeros needed for rank-updates HEC0 = globB.zeros(dtype=dtype) # Local reference matrix product: if rank == 0: # C0[:] = np.dot(A0, B0) gemm(1.0, B0, A0, 0.0, C0) # gemm(1.0, A0, A0, 0.0, Z0, transa='t') print(A0.shape, Z0.shape) Z0[:] = np.dot(A0.T, A0) # Y0[:] = np.dot(A0, X0) gemv(1.0, A0, X0.ravel(), 0.0, Y0.ravel()) r2k(1.0, A0, D0, 0.0, S0) rk(1.0, A0, 0.0, U0) HEC0[:] = np.dot(HEA0, B0) sM, sN = HEA0.shape # We don't use upper diagonal for i in range(sM): for j in range(sN): if i < j: HEA0[i][j] = 99999.0 if world.rank == 0: print(HEA0) assert globA.check(A0) and globB.check(B0) and globC.check(C0) assert globX.check(X0) and globY.check(Y0) assert globD.check(D0) and globS.check(S0) and globU.check(U0) # Create distributed destriptors with various block sizes: distA = grid.new_descriptor(M, K, 2, 2) distB = grid.new_descriptor(K, N, 2, 4) distC = grid.new_descriptor(M, N, 3, 2) distZ = grid.new_descriptor(K, K, 5, 7) distX = grid.new_descriptor(K, 1, 4, 1) distY = grid.new_descriptor(M, 1, 3, 1) distD = grid.new_descriptor(M, K, 2, 3) distS = grid.new_descriptor(M, M, 2, 2) distU = grid.new_descriptor(M, M, 2, 2) distHE = grid.new_descriptor(K, K, 2, 4) # Distributed matrices: A = distA.empty(dtype=dtype) B = distB.empty(dtype=dtype) C = distC.empty(dtype=dtype) Z = distZ.empty(dtype=dtype) X = distX.empty(dtype=dtype) Y = distY.empty(dtype=dtype) D = distD.empty(dtype=dtype) S = distS.zeros(dtype=dtype) # zeros needed for rank-updates U = distU.zeros(dtype=dtype) # zeros needed for rank-updates HEC = distB.zeros(dtype=dtype) HEA = distHE.zeros(dtype=dtype) Redistributor(world, globA, distA).redistribute(A0, A) Redistributor(world, globB, distB).redistribute(B0, B) Redistributor(world, globX, distX).redistribute(X0, X) Redistributor(world, globD, distD).redistribute(D0, D) Redistributor(world, globHEC, distHE).redistribute(HEA0, HEA) pblas_simple_gemm(distA, distB, distC, A, B, C) pblas_simple_gemm(distA, distA, distZ, A, A, Z, transa='T') pblas_simple_gemv(distA, distX, distY, A, X, Y) pblas_simple_r2k(distA, distD, distS, A, D, S) pblas_simple_rk(distA, distU, A, U) pblas_simple_hemm(distHE, distB, distB, HEA, B, HEC, uplo='L', side='L') # Collect result back on master C1 = globC.empty(dtype=dtype) Y1 = globY.empty(dtype=dtype) S1 = globS.zeros(dtype=dtype) # zeros needed for rank-updates U1 = globU.zeros(dtype=dtype) # zeros needed for rank-updates HEC1 = globB.zeros(dtype=dtype) Redistributor(world, distC, globC).redistribute(C, C1) Redistributor(world, distY, globY).redistribute(Y, Y1) Redistributor(world, distS, globS).redistribute(S, S1) Redistributor(world, distU, globU).redistribute(U, U1) Redistributor(world, distB, globB).redistribute(HEC, HEC1) if rank == 0: gemm_err = abs(C1 - C0).max() gemv_err = abs(Y1 - Y0).max() r2k_err = abs(S1 - S0).max() rk_err = abs(U1 - U0).max() hemm_err = abs(HEC1 - HEC0).max() print('gemm err', gemm_err) print('gemv err', gemv_err) print('r2k err', r2k_err) print('rk_err', rk_err) print('hemm_err', hemm_err) else: gemm_err = 0.0 gemv_err = 0.0 r2k_err = 0.0 rk_err = 0.0 hemm_err = 0.0 gemm_err = world.sum(gemm_err) # We don't like exceptions on only one cpu gemv_err = world.sum(gemv_err) r2k_err = world.sum(r2k_err) rk_err = world.sum(rk_err) hemm_err = world.sum(hemm_err) equal(gemm_err, 0, tol) equal(gemv_err, 0, tol) equal(r2k_err, 0, tol) equal(rk_err, 0, tol) equal(hemm_err, 0, tol)
def main(M=160, N=120, K=140, seed=42, mprocs=2, nprocs=2, dtype=float): gen = np.random.RandomState(seed) grid = BlacsGrid(world, mprocs, nprocs) if (dtype==complex): epsilon = 1.0j else: epsilon = 0.0 # Create descriptors for matrices on master: globA = grid.new_descriptor(M, K, M, K) globB = grid.new_descriptor(K, N, K, N) globC = grid.new_descriptor(M, N, M, N) globZ = grid.new_descriptor(K, K, K, K) globX = grid.new_descriptor(K, 1, K, 1) globY = grid.new_descriptor(M, 1, M, 1) globD = grid.new_descriptor(M, K, M, K) globS = grid.new_descriptor(M, M, M, M) globU = grid.new_descriptor(M, M, M, M) # print globA.asarray() # Populate matrices local to master: A0 = gen.rand(*globA.shape) + epsilon * gen.rand(*globA.shape) B0 = gen.rand(*globB.shape) + epsilon * gen.rand(*globB.shape) D0 = gen.rand(*globD.shape) + epsilon * gen.rand(*globD.shape) X0 = gen.rand(*globX.shape) + epsilon * gen.rand(*globX.shape) # Local result matrices Y0 = globY.empty(dtype=dtype) C0 = globC.zeros(dtype=dtype) Z0 = globZ.zeros(dtype=dtype) S0 = globS.zeros(dtype=dtype) # zeros needed for rank-updates U0 = globU.zeros(dtype=dtype) # zeros needed for rank-updates # Local reference matrix product: if rank == 0: # C0[:] = np.dot(A0, B0) gemm(1.0, B0, A0, 0.0, C0) #gemm(1.0, A0, A0, 0.0, Z0, transa='t') print A0.shape, Z0.shape Z0[:] = np.dot(A0.T, A0) # Y0[:] = np.dot(A0, X0) gemv(1.0, A0, X0.ravel(), 0.0, Y0.ravel()) r2k(1.0, A0, D0, 0.0, S0) rk(1.0, A0, 0.0, U0) assert globA.check(A0) and globB.check(B0) and globC.check(C0) assert globX.check(X0) and globY.check(Y0) assert globD.check(D0) and globS.check(S0) and globU.check(U0) # Create distributed destriptors with various block sizes: distA = grid.new_descriptor(M, K, 2, 2) distB = grid.new_descriptor(K, N, 2, 4) distC = grid.new_descriptor(M, N, 3, 2) distZ = grid.new_descriptor(K, K, 5, 7) distX = grid.new_descriptor(K, 1, 4, 1) distY = grid.new_descriptor(M, 1, 3, 1) distD = grid.new_descriptor(M, K, 2, 3) distS = grid.new_descriptor(M, M, 2, 2) distU = grid.new_descriptor(M, M, 2, 2) # Distributed matrices: A = distA.empty(dtype=dtype) B = distB.empty(dtype=dtype) C = distC.empty(dtype=dtype) Z = distZ.empty(dtype=dtype) X = distX.empty(dtype=dtype) Y = distY.empty(dtype=dtype) D = distD.empty(dtype=dtype) S = distS.zeros(dtype=dtype) # zeros needed for rank-updates U = distU.zeros(dtype=dtype) # zeros needed for rank-updates Redistributor(world, globA, distA).redistribute(A0, A) Redistributor(world, globB, distB).redistribute(B0, B) Redistributor(world, globX, distX).redistribute(X0, X) Redistributor(world, globD, distD).redistribute(D0, D) pblas_simple_gemm(distA, distB, distC, A, B, C) pblas_simple_gemm(distA, distA, distZ, A, A, Z, transa='T') pblas_simple_gemv(distA, distX, distY, A, X, Y) pblas_simple_r2k(distA, distD, distS, A, D, S) pblas_simple_rk(distA, distU, A, U) # Collect result back on master C1 = globC.empty(dtype=dtype) Y1 = globY.empty(dtype=dtype) S1 = globS.zeros(dtype=dtype) # zeros needed for rank-updates U1 = globU.zeros(dtype=dtype) # zeros needed for rank-updates Redistributor(world, distC, globC).redistribute(C, C1) Redistributor(world, distY, globY).redistribute(Y, Y1) Redistributor(world, distS, globS).redistribute(S, S1) Redistributor(world, distU, globU).redistribute(U, U1) if rank == 0: gemm_err = abs(C1 - C0).max() gemv_err = abs(Y1 - Y0).max() r2k_err = abs(S1 - S0).max() rk_err = abs(U1 - U0).max() print 'gemm err', gemm_err print 'gemv err', gemv_err print 'r2k err' , r2k_err print 'rk_err' , rk_err else: gemm_err = 0.0 gemv_err = 0.0 r2k_err = 0.0 rk_err = 0.0 gemm_err = world.sum(gemm_err) # We don't like exceptions on only one cpu gemv_err = world.sum(gemv_err) r2k_err = world.sum(r2k_err) rk_err = world.sum(rk_err) equal(gemm_err, 0, tol) equal(gemv_err, 0, tol) equal(r2k_err, 0, tol) equal(rk_err,0, tol)