def modified_cholesky_direct(M, tol=1e-5, verbose=False, cmax=20): """Modified cholesky decomposition of matrix. See, e.g. [Motta17]_ Parameters ---------- M : :class:`numpy.ndarray` Positive semi-definite, symmetric matrix. tol : float Accuracy desired. Optional. Default : False. verbose : bool If true print out convergence progress. Optional. Default : False. cmax : int Number of cholesky vectors to store N_gamma = cmax M. Returns ------- chol_vecs : :class:`numpy.ndarray` Matrix of cholesky vectors. """ # matrix of residuals. delta = numpy.copy(M.diagonal()) nchol_max = int(cmax * M.shape[0]**0.5) # index of largest diagonal element of residual matrix. nu = numpy.argmax(numpy.abs(delta)) delta_max = delta[nu] if verbose: print("# max number of cholesky vectors = %d" % nchol_max) header = ['iteration', 'max_residual', 'time'] print(format_fixed_width_strings(header)) init = [delta_max] print('{:17d} '.format(0) + format_fixed_width_floats(init)) # print ("# iteration %d: delta_max = %f"%(0, delta_max.real)) # Store for current approximation to input matrix. Mapprox = numpy.zeros(M.shape[0], dtype=M.dtype) chol_vecs = numpy.zeros((nchol_max, M.shape[0]), dtype=M.dtype) nchol = 0 chol_vecs[0] = numpy.copy(M[:, nu]) / delta_max**0.5 while abs(delta_max) > tol: # Update cholesky vector start = time.time() Mapprox += chol_vecs[nchol] * chol_vecs[nchol].conj() delta = M.diagonal() - Mapprox nu = numpy.argmax(numpy.abs(delta)) delta_max = numpy.abs(delta[nu]) nchol += 1 Munu0 = numpy.dot(chol_vecs[:nchol, nu].conj(), chol_vecs[:nchol, :]) chol_vecs[nchol] = (M[:, nu] - Munu0) / (delta_max)**0.5 if verbose: step_time = time.time() - start out = [delta_max, step_time] print('{:17d} '.format(nchol) + format_fixed_width_floats(out)) return numpy.array(chol_vecs[:nchol])
def chunked_cholesky(mol, max_error=1e-6, verbose=False, cmax=10): """Modified cholesky decomposition from pyscf eris. See, e.g. [Motta17]_ Only works for molecular systems. Parameters ---------- mol : :class:`pyscf.mol` pyscf mol object. orthoAO: :class:`numpy.ndarray` Orthogonalising matrix for AOs. (e.g., mo_coeff). delta : float Accuracy desired. verbose : bool If true print out convergence progress. cmax : int nchol = cmax * M, where M is the number of basis functions. Controls buffer size for cholesky vectors. Returns ------- chol_vecs : :class:`numpy.ndarray` Matrix of cholesky vectors in AO basis. """ nao = mol.nao_nr() diag = numpy.zeros(nao*nao) nchol_max = cmax * nao # This shape is more convenient for pauxy. chol_vecs = numpy.zeros((nchol_max, nao*nao)) # eri = numpy.zeros((nao,nao,nao,nao)) ndiag = 0 dims = [0] nao_per_i = 0 for i in range(0,mol.nbas): l = mol.bas_angular(i) nc = mol.bas_nctr(i) nao_per_i += (2*l+1)*nc dims.append(nao_per_i) start = time.time() for i in range(0,mol.nbas): shls = (i,i+1,0,mol.nbas,i,i+1,0,mol.nbas) buf = mol.intor('int2e_sph', shls_slice=shls) di, dk, dj, dl = buf.shape diag[ndiag:ndiag+di*nao] = buf.reshape(di*nao,di*nao).diagonal() ndiag += di * nao nu = numpy.argmax(diag) delta_max = diag[nu] if verbose: print(" # Generating Cholesky decomposition of ERIs."%nchol_max) print(" # max number of cholesky vectors = %d"%nchol_max) header = ['iteration', 'max_residual', 'time'] print(format_fixed_width_strings(header)) init = [delta_max, time.time()-start] print('{:17d} '.format(0)+format_fixed_width_floats(init)) j = nu // nao l = nu % nao sj = numpy.searchsorted(dims, j) sl = numpy.searchsorted(dims, l) if dims[sj] != j and j != 0: sj -= 1 if dims[sl] != l and l != 0: sl -= 1 Mapprox = numpy.zeros(nao*nao) # ERI[:,jl] eri_col = mol.intor('int2e_sph', shls_slice=(0,mol.nbas,0,mol.nbas,sj,sj+1,sl,sl+1)) cj, cl = max(j-dims[sj],0), max(l-dims[sl],0) chol_vecs[0] = numpy.copy(eri_col[:,:,cj,cl].reshape(nao*nao)) / delta_max**0.5 nchol = 0 while abs(delta_max) > max_error: # Update cholesky vector start = time.time() # M'_ii = \sum_x L_i^x L_i^x Mapprox += chol_vecs[nchol] * chol_vecs[nchol] # D_ii = M_ii - M'_ii delta = diag - Mapprox nu = numpy.argmax(numpy.abs(delta)) delta_max = numpy.abs(delta[nu]) # Compute ERI chunk. # shls_slice computes shells of integrals as determined by the angular # momentum of the basis function and the number of contraction # coefficients. Need to search for AO index within this shell indexing # scheme. # AO index. j = nu // nao l = nu % nao # Associated shell index. sj = numpy.searchsorted(dims, j) sl = numpy.searchsorted(dims, l) if dims[sj] != j and j != 0: sj -= 1 if dims[sl] != l and l != 0: sl -= 1 # Compute ERI chunk. eri_col = mol.intor('int2e_sph', shls_slice=(0,mol.nbas,0,mol.nbas,sj,sj+1,sl,sl+1)) # Select correct ERI chunk from shell. cj, cl = max(j-dims[sj],0), max(l-dims[sl],0) Munu0 = eri_col[:,:,cj,cl].reshape(nao*nao) # Updated residual = \sum_x L_i^x L_nu^x R = numpy.dot(chol_vecs[:nchol+1,nu], chol_vecs[:nchol+1,:]) chol_vecs[nchol+1] = (Munu0 - R) / (delta_max)**0.5 nchol += 1 if verbose: step_time = time.time() - start out = [delta_max, step_time] print('{:17d} '.format(nchol)+format_fixed_width_floats(out)) return chol_vecs[:nchol]
def run(self, comm, Xaoik, Xaolj, part, kpts, nmo_pk, nmo_max, Qi, gmap): # Setup residual matrix. ngs = Xaoik.shape[1] nkpts = len(kpts) t0 = time.clock() residual, k1max, k2max, i1max, i2max, maxv = (self.generate_diagonal( Xaoik, Xaolj, part, nmo_pk)) t1 = time.clock() if part.rank == 0 and self.verbose: print(" # Time to generate diagonal (initial residual):" " {:.2e}".format(t1 - t0)) sys.stdout.flush() comm.Allgather( numpy.array([k1max, k2max, i1max, i2max, maxv], dtype=numpy.float64), self.maxres_buff) vmax = 0 k3, k4, i3, i4, vmax = self.find_k3k4(comm.size) try: done = numpy.zeros((nkpts, nkpts, nmo_max, nmo_max), numpy.int32) maxresidual = numpy.zeros(part.maxvecs, dtype=numpy.float64) cholvecs = numpy.zeros((part.nkk, part.nij, part.maxvecs), dtype=numpy.complex128) Xkl = numpy.zeros(ngs, dtype=numpy.complex128) Xkl0 = numpy.zeros(ngs + part.maxvecs, dtype=numpy.complex128) Vbuff = numpy.zeros(part.maxvecs, dtype=numpy.complex128) except MemoryError: print(" # Problems allocating memory for auxiliary structures for " "Cholesky solver.") done[k3, k4, i3, i4] = 1 tstart = time.clock() if comm.rank == 0: sys.stdout.flush() more = True # for parallel numv = 0 if comm.rank == 0: header = [ "iteration", "max_residual", "total_time", "time_k3k4", "time_comp_cholv", "time_buff" ] if self.verbose: print(format_fixed_width_strings(header)) while more: t0 = time.clock() # stop condition if numv >= part.maxvecs: print(" Too many vectors needed to converge. Increase maximum " "number of vectors.") break kkmax = k3 * nkpts + k4 if comm.size <= nkpts: ipr = bisect(part.kkbounds[1:comm.size + 1], kkmax) if (kkmax >= part.kk0) & (kkmax < part.kkN): assert (comm.rank == ipr) else: i34 = i3 * nmo_pk[k4] + i4 for i in range(part.nproc_pk): ij0_, ijN_ = fair_share(nmo_max * nmo_max, part.nproc_pk, i) if i34 < ijN_: ipr = k3 * part.nproc_pk + i break # bcast Xaolj/CV[k3,k4,i34,0:numv] if comm.rank == ipr: assert (((i3 * nmo_pk[k4] + i4) >= part.ij0) & ((i3 * nmo_pk[k4] + i4) < part.ijN)) Xkl0[0:ngs] = Xaolj[kkmax - part.kk0, 0:ngs, i3 * nmo_pk[k4] + i4 - part.ij0] Xkl0[ngs:ngs + numv] = (cholvecs[kkmax - part.kk0, i3 * nmo_pk[k4] + i4 - part.ij0, 0:numv]) Vbuff[0:numv] = (cholvecs[kkmax - part.kk0, i3 * nmo_pk[k4] + i4 - part.ij0, 0:numv]) comm.Bcast(Xkl0[0:ngs + numv], root=ipr) else: comm.Bcast(Xkl0[0:ngs + numv], root=ipr) Vbuff[0:numv] = Xkl0[ngs:ngs + numv] # add new Cholesky vector # 1. evaluate new column (ik|imax,kmax) t1 = time.clock() tadd = 0.0 for k in range(part.nkk): k1 = part.n2k1[k] k2 = part.n2k2[k] if k3 == self.kconserv[k1, k2, k4]: q1 = kpts[k2] - kpts[k1] + kpts[k3] - kpts[k4] if numpy.sum(abs(q1)) > 1e-9: t_ = time.clock() ip = -1 for ii in range(27): if numpy.sum( numpy.linalg.norm(q1 - Qi[ii, :])) < 1e-12: ip = ii break if ip < 0: print(" # Could not find Q: {} {} ".format(q1, Qi)) sys.exit() for ix in range(ngs): Xkl[ix] = Xkl0[gmap[ip, ix]] tadd += time.clock() - t_ else: Xkl[0:ngs] = Xkl0[0:ngs] n_ = min(nmo_pk[k1] * nmo_pk[k2], part.ijN) - part.ij0 cholvecs[k, 0:n_, numv] = numpy.dot(Xaoik[k, :, 0:n_].T, Xkl.conj()) t2 = time.clock() # 2. subtract projection along previous components cholvecs[:, :, numv] -= numpy.dot(cholvecs[:, :, 0:numv], Vbuff[0:numv].conj()) cholvecs[:, :, numv] /= math.sqrt(vmax) # update residual residual -= (cholvecs[:, :, numv] * cholvecs[:, :, numv].conj()).real maxv = 0 for k in range(part.nkk): k1 = part.n2k1[k] k2 = part.n2k2[k] for ij in range(part.nij): if (ij + part.ij0) >= nmo_pk[k1] * nmo_pk[k2]: break if abs(residual[k, ij]) > maxv: maxv = abs(residual[k, ij]) k1max = k1 k2max = k2 i1max = (ij + part.ij0) // nmo_pk[k2] i2max = (ij + part.ij0) % nmo_pk[k2] t3 = time.clock() # assemble full CV on head node comm.Allgather( numpy.array([k1max, k2max, i1max, i2max, maxv], dtype=numpy.float64), self.maxres_buff) k3, k4, i3, i4, vmax = self.find_k3k4(comm.size) t4 = time.clock() # only root keeps track of residual and I/O if part.rank == 0: maxresidual[numv] = abs(vmax) # print and evaluate stop condition output = [vmax, t4 - t0, t3 - t2, t2 - t1, t1 - t0] if self.verbose: print("{:17d} ".format(numv) + format_fixed_width_floats(output)) # print("{:8d} {:13.8e}".format(numv, vmax)) tstart = time.clock() if numv % 100 == 0: sys.stdout.flush() numv += 1 if vmax < self.gtol_chol: more = False if (done[k3, k4, i3, i4] > 0) and more: print("Error in Cholesky decomposition. " "done[imax,kmax]>0.", k3, k4, i3, i4, vmax) sys.exit() done[k3, k4, i3, i4] = 1 t6 = time.clock() return cholvecs[:, :, :numv]
def run(self, comm, X, h5file): # Unpack for convenience. ngs = self.ngs nmo_max = self.nmo_max nkpts = self.nkpts part = self.part nmo_pk = self.nmo_pk QKToK2 = self.QKToK2 kpts = self.kpts # Setup residual matrix. mem = 2 * 16 * nkpts * ngs * nmo_max**2 / 1024**3 if self.verbose and comm.rank == 0: print(" # Approx total memory required for orbital products: " "{:.2e} GB.".format(mem)) mem_verbose = comm.rank == 0 and self.verbose > 1 if mem_verbose: print(" # Approx memory per MPI task for auxiliary structures.") Xaoik = alloc_helper((part.nkk, ngs, part.nij), dtype=numpy.complex128, name='Xaoik', verbose=mem_verbose) Xaolj = alloc_helper((part.nkk, ngs, part.nij), dtype=numpy.complex128, name='Xaolj', verbose=mem_verbose) done = alloc_helper((nkpts, nmo_max, nmo_max), numpy.int32, name='done', verbose=mem_verbose) maxresidual = alloc_helper((part.maxvecs, ), dtype=numpy.float64, name='maxresidual', verbose=mem_verbose) cholvecs = alloc_helper((part.nkk, part.nij, part.maxvecs), dtype=numpy.complex128, name='cholvecs', verbose=mem_verbose) Xkl = alloc_helper((ngs, ), dtype=numpy.complex128, name='xkl', verbose=mem_verbose) Xkl0 = alloc_helper((ngs + part.maxvecs, ), dtype=numpy.complex128, name='xkl0', verbose=mem_verbose) Vbuff = alloc_helper((part.maxvecs, ), dtype=numpy.complex128, name='Vbuff', verbose=mem_verbose) num_cholvecs = alloc_helper((nkpts, ), dtype=numpy.int32, name='num_cholvecs', verbose=mem_verbose) header = [ "iteration", "max_residual", "total_time", "time_k3k4", "time_comp_cholv", "time_buff" ] for Q in range(nkpts): t0 = time.clock() if Q > self.kminus[Q]: continue if comm.rank == 0 and self.verbose: print( " # Calculating factorization for momentum: {}".format(Q)) print(" # Generating orbital products") sys.stdout.flush() t1 = time.clock() maxresidual[:] = 0 done[:, :, :] = 0 cholvecs[:, :, :] = 0 start = time.time() self.generate_orbital_products(Q, X, Xaoik, Xaolj) if self.verbose and comm.rank == 0: print(" # Time to generate orbital products: " "{:13.8e}".format(time.time() - start)) residual, k1max, k2max, i1max, i2max, maxv = ( self.generate_diagonal(Q, Xaoik, Xaolj)) buff = numpy.array([k1max, k2max, i1max, i2max, maxv], dtype=numpy.float64) comm.Allgather(buff, self.maxres_buff) k3, k4, i3, i4, vmax = self.find_k3k4(comm.size) done[k3, i3, i4] = 1 tstart = time.clock() if comm.rank == 0: sys.stdout.flush() cnt = 0 vmaxold = vmax more = True # for parallel numv = 0 while more: t0 = time.clock() # stop condition if comm.rank == 0 and self.verbose: if numv == 0: print(format_fixed_width_strings(header)) if numv >= self.maxvecs: print(" Too many vectors needed to converge. " "Increase maximum number of vectors.") break if comm.size <= nkpts: ipr = bisect(part.kkbounds[1:comm.size + 1], k3) if (k3 >= part.kk0) and (k3 < part.kkN): assert (comm.rank == ipr) else: i34 = i3 * nmo_pk[k4] + i4 for i in range(part.nproc_pk): ij0_, ijN_ = fair_share(nmo_max * nmo_max, part.nproc_pk, i) if i34 < ijN_: ipr = k3 * part.nproc_pk + i break if comm.rank == ipr: assert ((i3 * nmo_pk[k4] + i4 >= part.ij0)) assert ((i3 * nmo_pk[k4] + i4 < part.ijN)) Xkl0[0:ngs] = Xaolj[k3 - part.kk0, 0:ngs, i3 * nmo_pk[k4] + i4 - part.ij0] Xkl0[ngs:ngs + numv] = cholvecs[k3 - part.kk0, i3 * nmo_pk[k4] + i4 - part.ij0, 0:numv] Vbuff[0:numv] = cholvecs[k3 - part.kk0, i3 * nmo_pk[k4] + i4 - part.ij0, 0:numv] comm.Bcast(Xkl0[0:ngs + numv], root=ipr) else: comm.Bcast(Xkl0[0:ngs + numv], root=ipr) Vbuff[0:numv] = Xkl0[ngs:ngs + numv] # add new Cholesky vector # 1. evaluate new column (ik|imax,kmax) t1 = time.clock() tadd = 0.0 for k in range(part.nkk): k1 = k + part.kk0 k2 = QKToK2[Q][k1] if part.ij0 > nmo_pk[k1] * nmo_pk[k2]: continue if numpy.sum(abs(kpts[k2] - kpts[k1] + kpts[k3] - kpts[k4])) > 1e-9: t_ = time.clock() q1 = kpts[k2] - kpts[k1] + kpts[k3] - kpts[k4] ip = -1 for ii in range(27): if numpy.sum( numpy.linalg.norm(q1 - self.Qi[ii, :])) < 1e-12: ip = ii break if ip < 0: print("Could not find Q: {} {}".format( q1, self.Qi)) sys.exit() for ix in range(ngs): Xkl[ix] = Xkl0[self.gmap[ip, ix]] tadd += time.clock() - t_ else: Xkl[0:ngs] = Xkl0[0:ngs] n_ = min(nmo_pk[k1] * nmo_pk[k2], part.ijN) - part.ij0 cholvecs[k, 0:n_, numv] = numpy.dot(Xaoik[k, :, 0:n_].T, Xkl.conj()) t2 = time.clock() # 2. substract projection along previous components cholvecs[:, :, numv] -= numpy.dot(cholvecs[:, :, 0:numv], Vbuff[0:numv].conj()) cholvecs[:, :, numv] /= math.sqrt(vmax) # update residual residual -= (cholvecs[:, :, numv] * cholvecs[:, :, numv].conj()).real maxv = 0 k1max = -1 k2max = -1 i1max = -1 i2max = -1 for k in range(part.nkk): k1 = k + part.kk0 k2 = self.QKToK2[Q][k1] # if ij0 > nmo_pk[k1]*nmo_pk[k2]: # continue for ij in range(part.nij): if (ij + part.ij0) >= nmo_pk[k1] * nmo_pk[k2]: break if abs(residual[k, ij]) > maxv: maxv = abs(residual[k, ij]) k1max = k1 k2max = k2 i1max = (ij + part.ij0) // nmo_pk[k2] i2max = (ij + part.ij0) % nmo_pk[k2] t3 = time.clock() # assemble full CV on head node buff = numpy.array([k1max, k2max, i1max, i2max, maxv], dtype=numpy.float64) comm.Allgather(buff, self.maxres_buff) k3, k4, i3, i4, vmax = self.find_k3k4(comm.size) t4 = time.clock() # only root keeps track of residual and I/O if comm.rank == 0: maxresidual[numv] = abs(vmax) # print and evaluate stop condition output = [vmax, t4 - t0, t3 - t2, t2 - t1, t1 - t0] if self.verbose: print("{:17d} ".format(numv) + format_fixed_width_floats(output)) tstart = time.clock() if numv % 100 == 0: sys.stdout.flush() numv += 1 if vmax < self.gtol_chol: more = False if done[k3, i3, i4] > 0 and more: # TODO: What is this? print( "Error in Cholesky decomposition. " "done[imax,kmax]>0.", k3, k4, i3, i4, vmax) sys.exit() done[k3, i3, i4] = 1 t6 = time.clock() comm.barrier() num_cholvecs[Q] = numv if h5file.phdf or comm.rank == 0: LQ = h5file.grp_v2.create_dataset( "L" + str(Q), (nkpts, nmo_max * nmo_max * numv, 2), dtype=numpy.float64) # cholvecs[nkk,nij,maxvecs] for kk in range(part.nkk): T_ = to_qmcpack_complex(cholvecs[kk, :, 0:numv].copy()) T_ = numpy.reshape(T_, (-1, 2)) / math.sqrt(nkpts * 1.0) LQ[kk + part.kk0, part.ij0 * numv:part.ijN * numv, :] = T_ T_ = None else: Ldim = h5file.grp_v2.create_dataset( "Ldim" + str(Q), data=numpy.array([ part.nkk, part.nij, part.kk0, part.ij0, part.ijN, numv ], dtype=numpy.int32)) LQ = h5file.grp_v2.create_dataset( "L" + str(Q), (part.nkk, part.nij * numv, 2), dtype=numpy.float64) # cholvecs[nkk,nij,maxvecs] for kk in range(part.nkk): T_ = to_qmcpack_complex(cholvecs[kk, :, 0:numv].copy()) T_ = numpy.reshape(T_, (-1, 2)) / math.sqrt(nkpts * 1.0) LQ[kk, :, :] = T_ T_ = None comm.barrier() h5file.grp.create_dataset("NCholPerKP", data=num_cholvecs) comm.barrier()