def main(nbands=1000, mprocs=2, mb=64): # Set-up BlacsGrud grid = BlacsGrid(world, mprocs, mprocs) # Create descriptor nndesc = grid.new_descriptor(nbands, nbands, mb, mb) H_nn = nndesc.empty( dtype=float) # outside the BlacsGrid these are size zero C_nn = nndesc.empty( dtype=float) # outside the BlacsGrid these are size zero eps_N = np.empty((nbands), dtype=float) # replicated array on all MPI tasks # Fill ScaLAPACK array alpha = 0.1 # off-diagonal beta = 75.0 # diagonal uplo = 'L' # lower-triangular scalapack_set(nndesc, H_nn, alpha, beta, uplo) scalapack_zero(nndesc, H_nn, switch_uplo[uplo]) t1 = time() # either interface will work, we recommend use the latter interface # scalapack_diagonalize_dc(nndesc, H_nn.copy(), C_nn, eps_N, 'L') nndesc.diagonalize_dc(H_nn.copy(), C_nn, eps_N) t2 = time() world.broadcast(eps_N, 0) # all MPI tasks now have eps_N world.barrier() # wait for everyone to finish if rank == 0: print('ScaLAPACK diagonalize_dc', t2 - t1) # Create replicated NumPy array diagonal = np.eye(nbands, dtype=float) offdiagonal = np.tril(np.ones((nbands, nbands)), -1) H0 = beta * diagonal + alpha * offdiagonal E0 = np.empty((nbands), dtype=float) t1 = time() diagonalize(H0, E0) t2 = time() if rank == 0: print('LAPACK diagonalize', t2 - t1) delta = abs(E0 - eps_N).max() if rank == 0: print(delta) assert delta < tol
def main(nbands=1000, mprocs=2, mb=64): # Set-up BlacsGrud grid = BlacsGrid(world, mprocs, mprocs) # Create descriptor nndesc = grid.new_descriptor(nbands, nbands, mb, mb) H_nn = nndesc.empty(dtype=float) # outside the BlacsGrid these are size zero C_nn = nndesc.empty(dtype=float) # outside the BlacsGrid these are size zero eps_N = np.empty((nbands), dtype=float) # replicated array on all MPI tasks # Fill ScaLAPACK array alpha = 0.1 # off-diagonal beta = 75.0 # diagonal uplo = 'L' # lower-triangular scalapack_set(nndesc, H_nn, alpha, beta, uplo) scalapack_zero(nndesc, H_nn, switch_uplo[uplo]) t1 = time() # either interface will work, we recommend use the latter interface # scalapack_diagonalize_dc(nndesc, H_nn.copy(), C_nn, eps_N, 'L') nndesc.diagonalize_dc(H_nn.copy(), C_nn, eps_N) t2 = time() world.broadcast(eps_N, 0) # all MPI tasks now have eps_N world.barrier() # wait for everyone to finish if rank == 0: print('ScaLAPACK diagonalize_dc', t2-t1) # Create replicated NumPy array diagonal = np.eye(nbands,dtype=float) offdiagonal = np.tril(np.ones((nbands,nbands)), -1) H0 = beta*diagonal + alpha*offdiagonal E0 = np.empty((nbands), dtype=float) t1 = time() diagonalize(H0,E0) t2 = time() if rank == 0: print('LAPACK diagonalize', t2-t1) delta = abs(E0-eps_N).max() if rank == 0: print(delta) assert delta < tol
def main(seed=42, dtype=float): ksl = BlacsBandLayouts(gd, bd, block_comm, dtype, mcpus, ncpus, blocksize) nbands = bd.nbands mynbands = bd.mynbands # Diagonalize # We would *not* create H_Nn in the real-space code this way. # This is just for testing purposes. # Note after MPI_Reduce, only meaningful information on gd masters H_Nn = np.zeros((nbands, mynbands), dtype=dtype) U_nN = np.empty((mynbands, nbands), dtype=dtype) if ksl.Nndescriptor: # hack scalapack_set(ksl.Nndescriptor, H_Nn, 0.1, 75.0, 'L') else: assert gd.comm.rank != 0 print("H_Nn") parallelprint(world, H_Nn) eps_n = np.zeros(bd.mynbands) blacs_diagonalize(ksl, H_Nn, U_nN, eps_n) print("U_nN") parallelprint(world, U_nN) print("eps_n") parallelprint(world, eps_n) # Inverse Cholesky S_Nn = np.zeros((nbands, mynbands), dtype=dtype) C_nN = np.empty((mynbands, nbands), dtype=dtype) if ksl.Nndescriptor: # hack scalapack_set(ksl.Nndescriptor, S_Nn, 0.1, 75.0, 'L') else: assert gd.comm.rank != 0 print("S_Nn") parallelprint(world, S_Nn) blacs_inverse_cholesky(ksl, S_Nn, C_nN) print("C_nN") parallelprint(world, C_nN)
def main(seed=42, dtype=float): ksl = BlacsBandLayouts(gd, bd, dtype, mcpus, ncpus, blocksize) nbands = bd.nbands mynbands = bd.mynbands # Diagonalize # We would *not* create H_Nn in the real-space code this way. # This is just for testing purposes. # Note after MPI_Reduce, only meaningful information on gd masters H_Nn = np.zeros((nbands, mynbands), dtype=dtype) U_nN = np.empty((mynbands, nbands), dtype=dtype) if ksl.Nndescriptor: # hack scalapack_set(ksl.Nndescriptor, H_Nn, 0.1, 75.0, 'L') else: assert gd.comm.rank != 0 print "H_Nn" parallelprint(world, H_Nn) eps_n = np.zeros(bd.mynbands) blacs_diagonalize(ksl, H_Nn, U_nN, eps_n) print "U_nN" parallelprint(world, U_nN) print "eps_n" parallelprint(world, eps_n) # Inverse Cholesky S_Nn = np.zeros((nbands, mynbands), dtype=dtype) C_nN = np.empty((mynbands, nbands), dtype=dtype) if ksl.Nndescriptor: # hack scalapack_set(ksl.Nndescriptor, S_Nn, 0.1, 75.0, 'L') else: assert gd.comm.rank != 0 print "S_Nn" parallelprint(world, S_Nn) blacs_inverse_cholesky(ksl, S_Nn, C_nN) print "C_nN" parallelprint(world, C_nN)
def test_trivial_cholesky(self): # Set up Hermitian overlap operator: S = lambda x: x dS = lambda a, P_ni: np.dot(P_ni, self.setups[a].dO_ii) nblocks = self.get_optimal_number_of_blocks(self.blocking) overlap = MatrixOperator(self.ksl, nblocks, self. async, True) S_nn = overlap.calculate_matrix_elements(self.psit_nG, self.P_ani, S, dS) # Known starting point of SI_nn = <psit_m|S+alpha*I|psit_n> I_nn = self.ksl.nndescriptor.empty(dtype=S_nn.dtype) scalapack_set(self.ksl.nndescriptor, I_nn, 0.0, 1.0, 'L') alpha = 1e-3 # shift eigenvalues away from zero C_nn = S_nn + alpha * I_nn self.ksl.nndescriptor.inverse_cholesky(C_nn, 'L') self.psit_nG = overlap.matrix_multiply(C_nn, self.psit_nG, self.P_ani) D_nn = overlap.calculate_matrix_elements(self.psit_nG, self.P_ani, S, dS) D_NN = self.ksl.nndescriptor.collect_on_master(D_nn) if self.bd.comm.rank == 0 and self.gd.comm.rank == 0: assert D_NN.shape == (self.bd.nbands, ) * 2 D_NN = D_NN.T.copy() # Fortran -> C indexing tri2full(D_NN, 'U') # upper to lower.. else: assert D_NN.nbytes == 0 D_NN = np.empty((self.bd.nbands, ) * 2, dtype=D_NN.dtype) if self.bd.comm.rank == 0: self.gd.comm.broadcast(D_NN, 0) self.bd.comm.broadcast(D_NN, 0) # D_NN = C_NN^dag * S_NN * C_NN = I_NN - alpha * C_NN^dag * C_NN I_NN = np.eye(self.bd.nbands) C0_NN = np.linalg.inv( np.linalg.cholesky(self.S0_nn + alpha * I_NN).T.conj()) D0_NN = I_NN - alpha * np.dot(C0_NN.T.conj(), C0_NN) self.check_and_plot(D_NN, D0_NN, 6, 'trivial,cholesky') #XXX precision
def test_trivial_cholesky(self): # Set up Hermitian overlap operator: S = lambda x: x dS = lambda a, P_ni: np.dot(P_ni, self.setups[a].dO_ii) nblocks = self.get_optimal_number_of_blocks(self.blocking) overlap = MatrixOperator(self.ksl, nblocks, self.async, True) S_nn = overlap.calculate_matrix_elements(self.psit_nG, self.P_ani, S, dS) # Known starting point of SI_nn = <psit_m|S+alpha*I|psit_n> I_nn = self.ksl.nndescriptor.empty(dtype=S_nn.dtype) scalapack_set(self.ksl.nndescriptor, I_nn, 0.0, 1.0, 'L') alpha = 1e-3 # shift eigenvalues away from zero C_nn = S_nn + alpha * I_nn self.ksl.nndescriptor.inverse_cholesky(C_nn, 'L') self.psit_nG = overlap.matrix_multiply(C_nn, self.psit_nG, self.P_ani) D_nn = overlap.calculate_matrix_elements(self.psit_nG, self.P_ani, S, dS) D_NN = self.ksl.nndescriptor.collect_on_master(D_nn) if self.bd.comm.rank == 0 and self.gd.comm.rank == 0: assert D_NN.shape == (self.bd.nbands,) * 2 D_NN = D_NN.T.copy() # Fortran -> C indexing tri2full(D_NN, 'U') # upper to lower.. else: assert D_NN.nbytes == 0 D_NN = np.empty((self.bd.nbands,) * 2, dtype=D_NN.dtype) if self.bd.comm.rank == 0: self.gd.comm.broadcast(D_NN, 0) self.bd.comm.broadcast(D_NN, 0) # D_NN = C_NN^dag * S_NN * C_NN = I_NN - alpha * C_NN^dag * C_NN I_NN = np.eye(self.bd.nbands) C0_NN = np.linalg.inv(np.linalg.cholesky(self.S0_nn + alpha*I_NN).T.conj()) D0_NN = I_NN - alpha * np.dot(C0_NN.T.conj(), C0_NN) self.check_and_plot(D_NN, D0_NN, 6, 'trivial,cholesky') #XXX precision
def linear_propagator(self, sourceC_nM, targetC_nM, S_MM, H_MM, dt): self.timer.start('Linear solve') if self.blacs: # XXX, Preallocate target_blockC_nm = self.Cnm_block_descriptor.empty(dtype=complex) temp_blockC_nm = self.Cnm_block_descriptor.empty(dtype=complex) temp_block_mm = self.mm_block_descriptor.empty(dtype=complex) if self.density.gd.comm.rank != 0: # XXX Fake blacks nbands, nao, nbands, nao grid because some # weird asserts # (these are 0,x or x,0 arrays) sourceC_nM = self.CnM_unique_descriptor.zeros(dtype=complex) # 1. target = (S+0.5j*H*dt) * source # Wave functions to target self.CnM2nm.redistribute(sourceC_nM, temp_blockC_nm) # XXX It can't be this f'n hard to symmetrize a matrix (tri2full) # Remove upper diagonal scalapack_zero(self.mm_block_descriptor, H_MM, 'U') # Lower diagonal matrix: temp_block_mm[:] = S_MM - (0.5j * dt) * H_MM scalapack_set(self.mm_block_descriptor, temp_block_mm, 0, 0, 'U') # Note it's strictly lower diagonal matrix # Add transpose of H pblas_tran(-0.5j * dt, H_MM, 1.0, temp_block_mm, self.mm_block_descriptor, self.mm_block_descriptor) # Add transpose of S pblas_tran(1.0, S_MM, 1.0, temp_block_mm, self.mm_block_descriptor, self.mm_block_descriptor) pblas_simple_gemm(self.Cnm_block_descriptor, self.mm_block_descriptor, self.Cnm_block_descriptor, temp_blockC_nm, temp_block_mm, target_blockC_nm) # 2. target = (S-0.5j*H*dt)^-1 * target # temp_block_mm[:] = S_MM + (0.5j*dt) * H_MM # XXX It can't be this f'n hard to symmetrize a matrix (tri2full) # Lower diagonal matrix: temp_block_mm[:] = S_MM + (0.5j * dt) * H_MM # Not it's stricly lower diagonal matrix: scalapack_set(self.mm_block_descriptor, temp_block_mm, 0, 0, 'U') # Add transpose of H: pblas_tran(+0.5j * dt, H_MM, 1.0, temp_block_mm, self.mm_block_descriptor, self.mm_block_descriptor) # Add transpose of S pblas_tran(1.0, S_MM, 1.0, temp_block_mm, self.mm_block_descriptor, self.mm_block_descriptor) scalapack_solve(self.mm_block_descriptor, self.Cnm_block_descriptor, temp_block_mm, target_blockC_nm) if self.density.gd.comm.rank != 0: # XXX is this correct? # XXX Fake blacks nbands, nao, nbands, nao grid because some # weird asserts # (these are 0,x or x,0 arrays) target = self.CnM_unique_descriptor.zeros(dtype=complex) else: target = targetC_nM self.Cnm2nM.redistribute(target_blockC_nm, target) self.density.gd.comm.broadcast(targetC_nM, 0) # Is this required? else: # Note: The full equation is conjugated (therefore -+, not +-) targetC_nM[:] = \ solve(S_MM - 0.5j * H_MM * dt, np.dot(S_MM + 0.5j * H_MM * dt, sourceC_nM.T.conjugate())).T.conjugate() self.timer.stop('Linear solve')
def linear_propagator(self, sourceC_nM, targetC_nM, S_MM, H_MM, dt): self.timer.start('Linear solve') # XXX Debugging stuff. Remove if self.propagator_debug: if self.blacs: globalH_MM = self.blacs_mm_to_global(H_MM) globalS_MM = self.blacs_mm_to_global(S_MM) if world.rank == 0: tri2full(globalS_MM, 'L') tri2full(globalH_MM, 'L') U_MM = dot(inv(globalS_MM-0.5j*globalH_MM*dt), globalS_MM+0.5j*globalH_MM*dt) debugC_nM = dot(sourceC_nM, U_MM.T.conjugate()) #print 'PASS PROPAGATOR' #debugC_nM = sourceC_nM.copy() else: if world.rank == 0: U_MM = dot(inv(S_MM-0.5j*H_MM*dt), S_MM+0.5j*H_MM*dt) debugC_nM = dot(sourceC_nM, U_MM.T.conjugate()) #print 'PASS PROPAGATOR' #debugC_nM = sourceC_nM.copy() if self.blacs: target_blockC_nm = self.Cnm_block_descriptor.empty(dtype=complex) # XXX, Preallocate temp_blockC_nm = self.Cnm_block_descriptor.empty(dtype=complex) # XXX, Preallocate temp_block_mm = self.mm_block_descriptor.empty(dtype=complex) if self.density.gd.comm.rank != 0: # XXX Fake blacks nbands, nao, nbands, nao grid because some weird asserts # (these are 0,x or x,0 arrays) sourceC_nM = self.CnM_unique_descriptor.zeros(dtype=complex) # 1. target = (S+0.5j*H*dt) * source # Wave functions to target self.CnM2nm.redistribute(sourceC_nM, temp_blockC_nm) # XXX It can't be this f'n hard to symmetrize a matrix (tri2full) scalapack_zero(self.mm_block_descriptor, H_MM, 'U') # Remove upper diagonal temp_block_mm[:] = S_MM - (0.5j*dt) * H_MM # Lower diagonal matrix scalapack_set(self.mm_block_descriptor, temp_block_mm, 0, 0, 'U') # Note it's stricly lower diagonal matrix pblas_tran(-0.5j*dt, H_MM, 1.0, temp_block_mm, self.mm_block_descriptor, self.mm_block_descriptor) # Add transpose of H pblas_tran(1.0, S_MM, 1.0, temp_block_mm, self.mm_block_descriptor, self.mm_block_descriptor) # Add transpose of S pblas_simple_gemm(self.Cnm_block_descriptor, self.mm_block_descriptor, self.Cnm_block_descriptor, temp_blockC_nm, temp_block_mm, target_blockC_nm) # 2. target = (S-0.5j*H*dt)^-1 * target #temp_block_mm[:] = S_MM + (0.5j*dt) * H_MM # XXX It can't be this f'n hard to symmetrize a matrix (tri2full) temp_block_mm[:] = S_MM + (0.5j*dt) * H_MM # Lower diagonal matrix scalapack_set(self.mm_block_descriptor, temp_block_mm, 0, 0, 'U') # Not it's stricly lower diagonal matrix pblas_tran(+0.5j*dt, H_MM, 1.0, temp_block_mm, self.mm_block_descriptor, self.mm_block_descriptor) # Add transpose of H pblas_tran(1.0, S_MM, 1.0, temp_block_mm, self.mm_block_descriptor, self.mm_block_descriptor) # Add transpose of S scalapack_solve(self.mm_block_descriptor, self.Cnm_block_descriptor, temp_block_mm, target_blockC_nm) if self.density.gd.comm.rank != 0: # XXX is this correct? # XXX Fake blacks nbands, nao, nbands, nao grid because some weird asserts # (these are 0,x or x,0 arrays) target = self.CnM_unique_descriptor.zeros(dtype=complex) else: target = targetC_nM self.Cnm2nM.redistribute(target_blockC_nm, target) self.density.gd.comm.broadcast(targetC_nM, 0) # Is this required? else: # Note: The full equation is conjugated (therefore -+, not +-) targetC_nM[:] = solve(S_MM-0.5j*H_MM*dt, np.dot(S_MM+0.5j*H_MM*dt, sourceC_nM.T.conjugate())).T.conjugate() # XXX Debugging stuff. Remove if self.propagator_debug: if world.rank == 0: verify(targetC_nM, debugC_nM, 'Linear solver propagator vs. reference') self.timer.stop('Linear solve')