示例#1
0
    def __init__(self, sl_lrtddft, nrows, lr_comms):
        self.mprocs, self.nprocs, self.block_size = tuple(sl_lrtddft)

        self.lr_comms = lr_comms

        # original grid, ie, how matrix is stored
        self.matrix_grid = BlacsGrid(self.lr_comms.parent_comm,
                                     self.lr_comms.dd_comm.size,
                                     self.lr_comms.eh_comm.size)

        # diagonalization grid
        self.diag_grid = BlacsGrid(self.lr_comms.parent_comm, self.mprocs,
                                   self.nprocs)

        # -----------------------------------------------------------------
        # for SCALAPACK we need TRANSPOSED MATRIX (and vector)
        #
        # M = rows, N = cols
        M = nrows
        N = nrows
        mb = 1
        nb = 1
        self.matrix_descr = self.matrix_grid.new_descriptor(N, M, nb, mb)

        bs = self.block_size
        self.diag_descr = self.diag_grid.new_descriptor(N, M, bs, bs)

        self.diag_in_redist = Redistributor(self.lr_comms.parent_comm,
                                            self.matrix_descr, self.diag_descr)

        self.diag_out_redist = Redistributor(self.lr_comms.parent_comm,
                                             self.diag_descr,
                                             self.matrix_descr)
示例#2
0
    def scalapack_diagonalize(self, H_sS):

        mb = 32
        N = self.nS

        g1 = BlacsGrid(world, size, 1)
        g2 = BlacsGrid(world, size // 2, 2)
        nndesc1 = g1.new_descriptor(N, N, self.nS_local, N)
        nndesc2 = g2.new_descriptor(N, N, mb, mb)

        A_ss = nndesc2.empty(dtype=H_sS.dtype)
        redistributor = Redistributor(world, nndesc1, nndesc2)
        redistributor.redistribute(H_sS, A_ss)

        # diagonalize
        v_ss = nndesc2.zeros(dtype=A_ss.dtype)
        w_S = np.zeros(N, dtype=float)
        nndesc2.diagonalize_dc(A_ss, v_ss, w_S, 'L')

        # distribute the eigenvectors to master
        v_sS = np.zeros_like(H_sS)
        redistributor = Redistributor(world, nndesc2, nndesc1)
        redistributor.redistribute(v_ss, v_sS)

        #        v2_SS = np.zeros((self.nS, self.nS), dtype=complex)
        #        world.all_gather(v_sS, v2_SS)

        return w_S, v_sS.conj()
示例#3
0
文件: bse.py 项目: thonmaker/gpaw
    def diagonalize(self):

        print('Diagonalizing Hamiltonian', file=self.fd)
        """The t and T represent local and global
           eigenstates indices respectively
        """

        # Non-Hermitian matrix can only use linalg.eig
        if not self.td:
            print('  Using numpy.linalg.eig...', file=self.fd)
            print('  Eliminated %s pair orbitals' % len(self.excludef_S),
                  file=self.fd)

            self.H_SS = self.collect_A_SS(self.H_sS)
            self.w_T = np.zeros(self.nS - len(self.excludef_S), complex)
            if world.rank == 0:
                self.H_SS = np.delete(self.H_SS, self.excludef_S, axis=0)
                self.H_SS = np.delete(self.H_SS, self.excludef_S, axis=1)
                self.w_T, self.v_ST = np.linalg.eig(self.H_SS)
            world.broadcast(self.w_T, 0)
            self.df_S = np.delete(self.df_S, self.excludef_S)
            self.rhoG0_S = np.delete(self.rhoG0_S, self.excludef_S)
        # Here the eigenvectors are returned as complex conjugated rows
        else:
            if world.size == 1:
                print('  Using lapack...', file=self.fd)
                from gpaw.utilities.lapack import diagonalize
                self.w_T = np.zeros(self.nS)
                diagonalize(self.H_sS, self.w_T)
                self.v_St = self.H_sS.conj().T
            else:
                print('  Using scalapack...', file=self.fd)
                nS = self.nS
                ns = -(-self.kd.nbzkpts // world.size) * (self.nv * self.nc *
                                                          self.spins *
                                                          (self.spinors + 1)**2)
                grid = BlacsGrid(world, world.size, 1)
                desc = grid.new_descriptor(nS, nS, ns, nS)

                desc2 = grid.new_descriptor(nS, nS, 2, 2)
                H_tmp = desc2.zeros(dtype=complex)
                r = Redistributor(world, desc, desc2)
                r.redistribute(self.H_sS, H_tmp)

                self.w_T = np.empty(nS)
                v_tmp = desc2.empty(dtype=complex)
                desc2.diagonalize_dc(H_tmp, v_tmp, self.w_T)

                r = Redistributor(grid.comm, desc2, desc)
                self.v_St = desc.zeros(dtype=complex)
                r.redistribute(v_tmp, self.v_St)
                self.v_St = self.v_St.conj().T

        if self.write_v and self.td:
            # Cannot use par_save without td
            self.par_save('v_TS.ulm', 'v_TS', self.v_St.T)

        return
示例#4
0
    def __init__(self,
                 gd,
                 bd,
                 block_comm,
                 dtype,
                 mcpus,
                 ncpus,
                 blocksize,
                 nao,
                 timer=nulltimer):
        BlacsLayouts.__init__(self, gd, bd, block_comm, dtype, mcpus, ncpus,
                              blocksize, timer)
        nbands = bd.nbands
        self.blocksize = blocksize
        self.mynbands = mynbands = bd.mynbands

        self.orbital_comm = self.bd.comm
        self.naoblocksize = naoblocksize = -((-nao) // self.orbital_comm.size)
        self.nao = nao

        # Range of basis functions for BLACS distribution of matrices:
        self.Mmax = nao
        self.Mstart = bd.comm.rank * naoblocksize
        self.Mstop = min(self.Mstart + naoblocksize, self.Mmax)
        self.mynao = self.Mstop - self.Mstart

        # Column layout for one matrix per band rank:
        self.columngrid = BlacsGrid(bd.comm, bd.comm.size, 1)
        self.mMdescriptor = self.columngrid.new_descriptor(
            nao, nao, naoblocksize, nao)
        self.nMdescriptor = self.columngrid.new_descriptor(
            nbands, nao, mynbands, nao)

        #parallelprint(world, (mynao, self.mMdescriptor.shape))

        # Column layout for one matrix in total (only on grid masters):
        self.single_column_grid = BlacsGrid(self.column_comm, bd.comm.size, 1)
        self.mM_unique_descriptor = self.single_column_grid.new_descriptor( \
            nao, nao, naoblocksize, nao)

        # nM_unique_descriptor is meant to hold the coefficients after
        # diagonalization.  BLACS requires it to be nao-by-nao, but
        # we only fill meaningful data into the first nbands columns.
        #
        # The array will then be trimmed and broadcast across
        # the grid descriptor's communicator.
        self.nM_unique_descriptor = self.single_column_grid.new_descriptor( \
            nbands, nao, mynbands, nao)

        # Fully blocked grid for diagonalization with many CPUs:
        self.mmdescriptor = self.blockgrid.new_descriptor(
            nao, nao, blocksize, blocksize)

        #self.nMdescriptor = nMdescriptor
        self.mM2mm = Redistributor(self.block_comm, self.mM_unique_descriptor,
                                   self.mmdescriptor)
        self.mm2nM = Redistributor(self.block_comm, self.mmdescriptor,
                                   self.nM_unique_descriptor)
示例#5
0
    def test_overlaps_nonhermitian(self):
        alpha = np.random.normal(size=1).astype(self.dtype)
        if self.dtype == complex:
            alpha += 1j * np.random.normal(size=1)
        world.broadcast(alpha, 0)

        # Set up non-Hermitian overlap operator:
        S = lambda x: alpha * x
        dS = lambda a, P_ni: np.dot(alpha * P_ni, self.setups[a].dO_ii)
        nblocks = self.get_optimal_number_of_blocks(self.blocking)
        overlap = MatrixOperator(self.ksl, nblocks, self. async, False)
        if 0:  #XXX non-hermitian case so Nn2nn not just uplo='L' but rather 'G'
            blockcomm = self.ksl.nndescriptor.blacsgrid.comm
            self.ksl.Nn2nn = Redistributor(blockcomm, self.ksl.Nndescriptor,
                                           self.ksl.nndescriptor)
        S_nn = overlap.calculate_matrix_elements(self.psit_nG, self.P_ani, S,
                                                 dS)

        if memstats:
            self.mem_test = record_memory()

        S_NN = self.ksl.nndescriptor.collect_on_master(S_nn)
        if self.bd.comm.rank == 0 and self.gd.comm.rank == 0:
            assert S_NN.shape == (self.bd.nbands, ) * 2
            S_NN = S_NN.T.copy()  # Fortran -> C indexing
        else:
            assert S_NN.nbytes == 0
            S_NN = np.empty((self.bd.nbands, ) * 2, dtype=S_NN.dtype)

        if self.bd.comm.rank == 0:
            self.gd.comm.broadcast(S_NN, 0)
        self.bd.comm.broadcast(S_NN, 0)

        self.check_and_plot(S_NN, alpha * self.S0_nn, 9,
                            'overlaps,nonhermitian')
示例#6
0
    def calculate_blocked_density_matrix(self, f_n, C_nM):
        nbands = self.bd.nbands
        nao = self.nao
        dtype = C_nM.dtype

        self.nMdescriptor.checkassert(C_nM)
        if self.gd.rank == 0:
            Cf_nM = (C_nM * f_n[:, None]).conj()
        else:
            C_nM = self.nM_unique_descriptor.zeros(dtype=dtype)
            Cf_nM = self.nM_unique_descriptor.zeros(dtype=dtype)

        r = Redistributor(self.block_comm, self.nM_unique_descriptor,
                          self.mmdescriptor)

        Cf_mm = self.mmdescriptor.zeros(dtype=dtype)
        r.redistribute(Cf_nM, Cf_mm, nbands, nao)
        del Cf_nM

        C_mm = self.mmdescriptor.zeros(dtype=dtype)
        r.redistribute(C_nM, C_mm, nbands, nao)
        # no use to delete C_nM as it's in the input...

        rho_mm = self.mmdescriptor.zeros(dtype=dtype)

        pblas_simple_gemm(self.mmdescriptor,
                          self.mmdescriptor,
                          self.mmdescriptor,
                          Cf_mm,
                          C_mm,
                          rho_mm,
                          transa='T')
        return rho_mm
示例#7
0
def parallel_eigh(matrixfile, blacsgrid=(4, 2), blocksize=64):
    """Diagonalize matrix in parallel"""
    assert np.prod(blacsgrid) == world.size
    grid = BlacsGrid(world, *blacsgrid)

    if world.rank == MASTER:
        H_MM = np.load(matrixfile)
        assert H_MM.ndim == 2
        assert H_MM.shape[0] == H_MM.shape[1]
        NM = len(H_MM)
    else:
        NM = 0
    NM = world.sum(NM) # Distribute matrix shape to all nodes

    # descriptor for the individual blocks
    block_desc = grid.new_descriptor(NM, NM, blocksize, blocksize)

    # descriptor for global array on MASTER
    local_desc = grid.new_descriptor(NM, NM, NM, NM)

    # Make some dummy array on all the slaves
    if world.rank != MASTER:
        H_MM = local_desc.zeros()
    assert local_desc.check(H_MM)

    # The local version of the matrix
    H_mm = block_desc.empty()

    # Distribute global array to smaller blocks
    redistributor = Redistributor(world, local_desc, block_desc)
    redistributor.redistribute(H_MM, H_mm)

    # Allocate arrays for eigenvalues and -vectors
    eps_M = np.empty(NM)
    C_mm = block_desc.empty()
    block_desc.diagonalize_ex(H_mm, C_mm, eps_M)

    # Collect eigenvectors on MASTER
    C_MM = local_desc.empty()
    redistributor2 = Redistributor(world, block_desc, local_desc)
    redistributor2.redistribute(C_mm, C_MM)

    # Return eigenvalues and -vectors on Master
    if world.rank == MASTER:
        return eps_M, C_MM
    else:
        return None, None
示例#8
0
    def test_multiply_nonhermitian(self):
        alpha = np.random.normal(size=1).astype(self.dtype)
        if self.dtype == complex:
            alpha += 1j * np.random.normal(size=1)
        world.broadcast(alpha, 0)

        # Known starting point of S_nn = <psit_m|S|psit_n>
        S_NN = alpha * self.S0_nn

        if self.dtype == complex:
            C_NN = np.random.uniform(size=self.nbands**2) * \
                np.exp(1j*np.random.uniform(0,2*np.pi,size=self.nbands**2))
        else:
            C_NN = np.random.normal(size=self.nbands**2)
        C_NN = C_NN.reshape(
            (self.nbands, self.nbands)) / np.linalg.norm(C_NN, 2)
        world.broadcast(C_NN, 0)

        # Set up Hermitian overlap operator:
        S = lambda x: alpha * x
        dS = lambda a, P_ni: np.dot(alpha * P_ni, self.setups[a].dO_ii)
        nblocks = self.get_optimal_number_of_blocks(self.blocking)
        overlap = MatrixOperator(self.ksl, nblocks, self. async, False)

        if 0:  #XXX non-hermitian case so Nn2nn not just uplo='L' but rather 'G'
            blockcomm = self.ksl.nndescriptor.blacsgrid.comm
            self.ksl.Nn2nn = Redistributor(blockcomm, self.ksl.Nndescriptor,
                                           self.ksl.nndescriptor)

        if self.bd.comm.rank == 0 and self.gd.comm.rank == 0:
            assert C_NN.shape == (self.bd.nbands, ) * 2
            tmp_NN = C_NN.T.copy()  # C -> Fortran indexing
        else:
            tmp_NN = self.ksl.nndescriptor.as_serial().empty(dtype=C_NN.dtype)
        C_nn = self.ksl.nndescriptor.distribute_from_master(tmp_NN)

        self.psit_nG = overlap.matrix_multiply(C_nn, self.psit_nG, self.P_ani)
        D_nn = overlap.calculate_matrix_elements(self.psit_nG, self.P_ani, S,
                                                 dS)

        if memstats:
            self.mem_test = record_memory()

        D_NN = self.ksl.nndescriptor.collect_on_master(D_nn)
        if self.bd.comm.rank == 0 and self.gd.comm.rank == 0:
            assert D_NN.shape == (self.bd.nbands, ) * 2
            D_NN = D_NN.T.copy()  # Fortran -> C indexing
        else:
            assert D_NN.nbytes == 0
            D_NN = np.empty((self.bd.nbands, ) * 2, dtype=D_NN.dtype)

        if self.bd.comm.rank == 0:
            self.gd.comm.broadcast(D_NN, 0)
        self.bd.comm.broadcast(D_NN, 0)

        # D_nn = C_nn^dag * S_nn * C_nn
        D0_NN = np.dot(C_NN.T.conj(), np.dot(S_NN, C_NN))
        self.check_and_plot(D_NN, D0_NN, 9, 'multiply,nonhermitian')
示例#9
0
def scal_diagonalize(A, nodes='master'):
    # Diagonalize matrix A (size N*N) with scalapack
    # Usage: eps, B = scal_diagonalize(A)
    # eps and B and the eigenvalues and eigenvectors
    # nodes = 'master': eigenvectors only available on master node
    # nodes = 'all': eigenvectors broadcast to all nodes

    # make sure A is N*N, and hermitian
    N = A.shape[0]
    assert A.shape[0] == A.shape[1]
    for i in range(N):
        for j in range(i, N):
            assert A[i, j] == A[j, i].conj()

    # create blacs descriptor
    mb = 64
    g = BlacsGrid(world, 2, size // 2)
    nndesc1 = g.new_descriptor(N, N, N, N)
    nndesc2 = g.new_descriptor(N, N, mb, mb)

    # distribute A to blacs grid A_
    if rank != 0:
        A = nndesc1.zeros(dtype=A.dtype)
    A_ = nndesc2.empty(dtype=A.dtype)
    redistributor = Redistributor(world, nndesc1, nndesc2)
    redistributor.redistribute(A, A_)

    # diagonalize
    B_ = nndesc2.zeros(dtype=A.dtype)
    eps = np.zeros(N, dtype=A.dtype)
    nndesc2.diagonalize_dc(A_, B_, eps, 'L')

    # distribute the eigenvectors to master
    B = np.zeros_like(A)
    redistributor = Redistributor(world, nndesc2, nndesc1)
    redistributor.redistribute(B_, B)

    if nodes == 'master':
        return eps, B
    elif nodes == 'all':
        if rank != 0:
            B = np.zeros((N, N))
        world.broadcast(B, 0)
        return eps, B
示例#10
0
 def distribute_to_columns(self, rho_mm, srcdescriptor):
     redistributor = Redistributor(
         self.block_comm,  # XXX
         srcdescriptor,
         self.mM_unique_descriptor)
     rho_mM = redistributor.redistribute(rho_mm)
     if self.gd.rank != 0:
         rho_mM = self.mMdescriptor.zeros(dtype=rho_mm.dtype)
     self.gd.comm.broadcast(rho_mM, 0)
     return rho_mM
示例#11
0
    def initialize(self, paw, hamiltonian=None):
        LCAOPropagator.initialize(self, paw)
        if hamiltonian is not None:
            self.hamiltonian = hamiltonian

        ksl = self.wfs.ksl
        self.blacs = ksl.using_blacs
        if self.blacs:
            from gpaw.blacs import Redistributor
            self.log('BLACS Parallelization')

            # Parallel grid descriptors
            grid = ksl.blockgrid
            assert grid.nprow * grid.npcol == ksl.block_comm.size
            self.mm_block_descriptor = ksl.mmdescriptor
            self.Cnm_block_descriptor = grid.new_descriptor(ksl.bd.nbands,
                                                            ksl.nao,
                                                            ksl.blocksize,
                                                            ksl.blocksize)
            self.CnM_unique_descriptor = ksl.nM_unique_descriptor

            # Redistributors
            self.Cnm2nM = Redistributor(ksl.block_comm,
                                        self.Cnm_block_descriptor,
                                        self.CnM_unique_descriptor)
            self.CnM2nm = Redistributor(ksl.block_comm,
                                        self.CnM_unique_descriptor,
                                        self.Cnm_block_descriptor)

            if debug:
                nao = ksl.nao
                self.MM_descriptor = grid.new_descriptor(nao, nao, nao, nao)
                self.mm2MM = Redistributor(ksl.block_comm,
                                           self.mm_block_descriptor,
                                           self.MM_descriptor)
                self.MM2mm = Redistributor(ksl.block_comm,
                                           self.MM_descriptor,
                                           self.mm_block_descriptor)

            for kpt in self.wfs.kpt_u:
                scalapack_zero(self.mm_block_descriptor, kpt.S_MM, 'U')
                scalapack_zero(self.mm_block_descriptor, kpt.T_MM, 'U')
示例#12
0
    def __init__(self,
                 gd,
                 bd,
                 block_comm,
                 dtype,
                 mcpus,
                 ncpus,
                 blocksize,
                 buffer_size=None,
                 timer=nulltimer):
        BlacsLayouts.__init__(self, gd, bd, block_comm, dtype, mcpus, ncpus,
                              blocksize, timer)
        self.buffer_size = buffer_size
        nbands = bd.nbands
        self.mynbands = mynbands = bd.mynbands
        self.blocksize = blocksize

        # 1D layout - columns
        self.columngrid = BlacsGrid(self.column_comm, 1, bd.comm.size)
        self.Nndescriptor = self.columngrid.new_descriptor(
            nbands, nbands, nbands, mynbands)

        # 2D layout
        self.nndescriptor = self.blockgrid.new_descriptor(
            nbands, nbands, blocksize, blocksize)

        # 1D layout - rows
        self.rowgrid = BlacsGrid(self.column_comm, bd.comm.size, 1)
        self.nNdescriptor = self.rowgrid.new_descriptor(
            nbands, nbands, mynbands, nbands)

        # Only redistribute filled out half for Hermitian matrices
        self.Nn2nn = Redistributor(self.block_comm, self.Nndescriptor,
                                   self.nndescriptor)
        #self.Nn2nn = Redistributor(self.block_comm, self.Nndescriptor,
        #                           self.nndescriptor, 'L') #XXX faster but...

        # Resulting matrix will be used in dgemm which is symmetry obvlious
        self.nn2nN = Redistributor(self.block_comm, self.nndescriptor,
                                   self.nNdescriptor)
示例#13
0
    def redistribute_H(self, H_sS):

        g1 = BlacsGrid(world, size, 1)
        g2 = BlacsGrid(world, 1, size)
        N = self.nS
        nndesc1 = g1.new_descriptor(N, N, self.nS_local, N)
        nndesc2 = g2.new_descriptor(N, N, N, self.nS_local)

        H_Ss = nndesc2.empty(dtype=H_sS.dtype)
        redistributor = Redistributor(world, nndesc1, nndesc2)
        redistributor.redistribute(H_sS, H_Ss)

        return H_Ss
示例#14
0
def distribute_MM(wfs, a_MM):
    ksl = wfs.ksl
    if not ksl.using_blacs:
        return a_MM

    dtype = a_MM.dtype
    ksl_comm = ksl.block_comm
    NM = ksl.nao
    grid = BlacsGrid(ksl_comm, 1, 1)
    MM_descriptor = grid.new_descriptor(NM, NM, NM, NM)
    MM2mm = Redistributor(ksl_comm, MM_descriptor, ksl.mmdescriptor)
    if ksl_comm.rank != 0:
        a_MM = MM_descriptor.empty(dtype=dtype)

    a_mm = ksl.mmdescriptor.empty(dtype=dtype)
    MM2mm.redistribute(a_MM, a_mm)
    return a_mm
示例#15
0
    def redistribute(self, in_wGG, out_x=None):
        """Redistribute array.
        
        Switch between two kinds of parallel distributions:
            
        1) parallel over G-vectors (second dimension of in_wGG)
        2) parallel over frequency (first dimension of in_wGG)

        Returns new array using the memory in the 1-d array out_x.
        """

        comm = self.blockcomm

        if comm.size == 1:
            return in_wGG

        nw = len(self.omega_w)
        nG = in_wGG.shape[2]
        mynw = (nw + comm.size - 1) // comm.size
        mynG = (nG + comm.size - 1) // comm.size

        bg1 = BlacsGrid(comm, comm.size, 1)
        bg2 = BlacsGrid(comm, 1, comm.size)
        md1 = BlacsDescriptor(bg1, nw, nG**2, mynw, nG**2)
        md2 = BlacsDescriptor(bg2, nw, nG**2, nw, mynG * nG)

        if len(in_wGG) == nw:
            mdin = md2
            mdout = md1
        else:
            mdin = md1
            mdout = md2

        r = Redistributor(comm, mdin, mdout)

        outshape = (mdout.shape[0], mdout.shape[1] // nG, nG)
        if out_x is None:
            out_wGG = np.empty(outshape, complex)
        else:
            out_wGG = out_x[:np.product(outshape)].reshape(outshape)

        r.redistribute(in_wGG.reshape(mdin.shape),
                       out_wGG.reshape(mdout.shape))

        return out_wGG
示例#16
0
def test(comm, M, N, mcpus, ncpus, mb, nb):
    grid0 = BlacsGrid(comm, 1, 1)
    desc0 = grid0.new_descriptor(M, N, M, N, 0, 0)
    A_mn = desc0.zeros(dtype=float)
    A_mn[:] = comm.size + 1

    grid1 = BlacsGrid(comm, mcpus, ncpus)
    desc1 = grid1.new_descriptor(M, N, mb, nb, 0, 0)  # ???
    B_mn = desc1.zeros(dtype=float)
    B_mn[:] = comm.rank

    if comm.rank == 0:
        msg = 'Slices of global matrix indices by rank'
        print(msg)
        print('-' * len(msg))

    for rank in range(comm.size):
        comm.barrier()
        if rank == comm.rank:
            print('Rank %d:' % rank)
            last_Mstart = -1
            for Mstart, Mstop, Nstart, Nstop, block in desc1.my_blocks(B_mn):
                if Mstart > last_Mstart and last_Mstart >= 0:
                    print()
                print('[%3d:%3d, %3d:%3d]' % (Mstart, Mstop, Nstart, Nstop),
                      end=' ')
                last_Mstart = Mstart
                assert (block == comm.rank).all()
                #print block
                #print
            print()
            print()
        comm.barrier()

    redistributor = Redistributor(comm, desc1, desc0)
    redistributor.redistribute(B_mn, A_mn)

    if comm.rank == 0:
        msg = 'Rank where each element of the global matrix is stored'
        print(msg)
        print('-' * len(msg))
        print(A_mn)
示例#17
0
def collect_wuMM(wfs, a_wuMM, w, s, k):
    # This function is based on
    # gpaw/wavefunctions/base.py: WaveFunctions.collect_auxiliary()

    dtype = a_wuMM[0][0].dtype

    ksl = wfs.ksl
    NM = ksl.nao
    kpt_rank, u = wfs.kd.get_rank_and_index(s, k)

    ksl_comm = ksl.block_comm

    if wfs.kd.comm.rank == kpt_rank:
        a_MM = a_wuMM[w][u]

        # Collect within blacs grid
        if ksl.using_blacs:
            a_mm = a_MM
            grid = BlacsGrid(ksl_comm, 1, 1)
            MM_descriptor = grid.new_descriptor(NM, NM, NM, NM)
            mm2MM = Redistributor(ksl_comm, ksl.mmdescriptor, MM_descriptor)

            a_MM = MM_descriptor.empty(dtype=dtype)
            mm2MM.redistribute(a_mm, a_MM)

        # KSL master send a_MM to the global master
        if ksl_comm.rank == 0:
            if kpt_rank == 0:
                assert wfs.world.rank == 0
                # I have it already
                return a_MM
            else:
                wfs.kd.comm.send(a_MM, 0, 2017)
                return None
    elif ksl_comm.rank == 0 and kpt_rank != 0:
        assert wfs.world.rank == 0
        a_MM = np.empty((NM, NM), dtype=dtype)
        wfs.kd.comm.receive(a_MM, kpt_rank, 2017)
        return a_MM
示例#18
0
    def calculate_density_matrix(self, f_n, C_nM, rho_mM=None):
        """Calculate density matrix from occupations and coefficients.

        Presently this function performs the usual scalapack 3-step trick:
        redistribute-numbercrunching-backdistribute.
        
        
        Notes on future performance improvement.
        
        As per the current framework, C_nM exists as copies on each
        domain, i.e. this is not parallel over domains.  We'd like to
        correct this and have an efficient distribution using e.g. the
        block communicator.

        The diagonalization routine and other parts of the code should
        however be changed to accommodate the following scheme:
        
        Keep coefficients in C_mm form after the diagonalization.
        rho_mm can then be directly calculated from C_mm without
        redistribution, after which we only need to redistribute
        rho_mm across domains.
        
        """
        dtype = C_nM.dtype
        rho_mm = self.calculate_blocked_density_matrix(f_n, C_nM)
        rback = Redistributor(self.block_comm, self.mmdescriptor,
                              self.mM_unique_descriptor)
        rho1_mM = self.mM_unique_descriptor.zeros(dtype=dtype)
        rback.redistribute(rho_mm, rho1_mM)
        del rho_mm

        if rho_mM is None:
            if self.gd.rank == 0:
                rho_mM = rho1_mM
            else:
                rho_mM = self.mMdescriptor.zeros(dtype=dtype)

        self.gd.comm.broadcast(rho_mM, 0)
        return rho_mM
示例#19
0
    def distribute_frequencies(self, chi0_wGG):
        """Distribute frequencies to all cores."""

        world = self.world
        comm = self.blockcomm

        if world.size == 1:
            return chi0_wGG

        nw = len(self.omega_w)
        nG = chi0_wGG.shape[2]
        mynw = (nw + world.size - 1) // world.size
        mynG = (nG + comm.size - 1) // comm.size

        wa = min(world.rank * mynw, nw)
        wb = min(wa + mynw, nw)

        if self.blockcomm.size == 1:
            return chi0_wGG[wa:wb].copy()

        if self.kncomm.rank == 0:
            bg1 = BlacsGrid(comm, 1, comm.size)
            in_wGG = chi0_wGG.reshape((nw, -1))
        else:
            bg1 = DryRunBlacsGrid(mpi.serial_comm, 1, 1)
            in_wGG = np.zeros((0, 0), complex)
        md1 = BlacsDescriptor(bg1, nw, nG**2, nw, mynG * nG)

        bg2 = BlacsGrid(world, world.size, 1)
        md2 = BlacsDescriptor(bg2, nw, nG**2, mynw, nG**2)

        r = Redistributor(world, md1, md2)
        shape = (wb - wa, nG, nG)
        out_wGG = np.empty(shape, complex)
        r.redistribute(in_wGG, out_wGG.reshape((wb - wa, nG**2)))

        return out_wGG
示例#20
0
    def tddft_init(self):
        if not self.tddft_initialized:
            if world.rank == 0:
                print('Initializing real time LCAO TD-DFT calculation.')
                print('XXX Warning: Array use not optimal for memory.')
                print('XXX Taylor propagator probably doesn\'t work')
                print('XXX ...and no arrays are listed in memory estimate yet.')
            self.blacs = self.wfs.ksl.using_blacs
            if self.blacs:
                self.ksl = ksl = self.wfs.ksl    
                nao = ksl.nao
                nbands = ksl.bd.nbands
                mynbands = ksl.bd.mynbands
                blocksize = ksl.blocksize

                from gpaw.blacs import Redistributor
                if world.rank == 0:
                    print('BLACS Parallelization')

                # Parallel grid descriptors
                self.MM_descriptor = ksl.blockgrid.new_descriptor(nao, nao, nao, nao) # FOR DEBUG
                self.mm_block_descriptor = ksl.blockgrid.new_descriptor(nao, nao, blocksize, blocksize)
                self.Cnm_block_descriptor = ksl.blockgrid.new_descriptor(nbands, nao, blocksize, blocksize)
                #self.CnM_descriptor = ksl.blockgrid.new_descriptor(nbands, nao, mynbands, nao)
                self.mM_column_descriptor = ksl.single_column_grid.new_descriptor(nao, nao, ksl.naoblocksize, nao)
                self.CnM_unique_descriptor = ksl.single_column_grid.new_descriptor(nbands, nao, mynbands, nao)

                # Redistributors
                self.mm2MM = Redistributor(ksl.block_comm,
                                           self.mm_block_descriptor,
                                           self.MM_descriptor) # XXX FOR DEBUG
                self.MM2mm = Redistributor(ksl.block_comm,
                                           self.MM_descriptor,
                                           self.mm_block_descriptor) # XXX FOR DEBUG
                self.Cnm2nM = Redistributor(ksl.block_comm,
                                            self.Cnm_block_descriptor,
                                            self.CnM_unique_descriptor) 
                self.CnM2nm = Redistributor(ksl.block_comm,
                                            self.CnM_unique_descriptor,
                                            self.Cnm_block_descriptor) 
                self.mM2mm =  Redistributor(ksl.block_comm,
                                            self.mM_column_descriptor,
                                            self.mm_block_descriptor)

                for kpt in self.wfs.kpt_u:
                    scalapack_zero(self.mm_block_descriptor, kpt.S_MM,'U')
                    scalapack_zero(self.mm_block_descriptor, kpt.T_MM,'U')

                # XXX to propagator class
                if self.propagator == 'taylor' and self.blacs:  
                    # cholS_mm = self.mm_block_descriptor.empty(dtype=complex)
                    for kpt in self.wfs.kpt_u:
                        kpt.invS_MM = kpt.S_MM.copy()
                        scalapack_inverse(self.mm_block_descriptor,
                                          kpt.invS_MM, 'L')
                    if self.propagator_debug:
                        if world.rank == 0:
                            print('XXX Doing serial inversion of overlap matrix.')
                        self.timer.start('Invert overlap (serial)')
                        invS2_MM = self.MM_descriptor.empty(dtype=complex)
                        for kpt in self.wfs.kpt_u:
                            #kpt.S_MM[:] = 128.0*(2**world.rank)
                            self.mm2MM.redistribute(self.wfs.S_qMM[kpt.q], invS2_MM)
                            world.barrier()
                            if world.rank == 0:
                                tri2full(invS2_MM,'L')
                                invS2_MM[:] = inv(invS2_MM.copy())
                                self.invS2_MM = invS2_MM
                            kpt.invS2_MM = ksl.mmdescriptor.empty(dtype=complex)
                            self.MM2mm.redistribute(invS2_MM, kpt.invS2_MM)
                            verify(kpt.invS_MM, kpt.invS2_MM, 'overlap par. vs. serial', 'L')
                        self.timer.stop('Invert overlap (serial)')
                        if world.rank == 0:
                            print('XXX Overlap inverted.')
                if self.propagator == 'taylor' and not self.blacs:
                    tmp = inv(self.wfs.kpt_u[0].S_MM)
                    self.wfs.kpt_u[0].invS = tmp

            # Reset the density mixer
            self.density.mixer = DummyMixer()    
            self.tddft_initialized = True
            for k, kpt in enumerate(self.wfs.kpt_u):
                kpt.C2_nM = kpt.C_nM.copy()
示例#21
0
def main(M=160, N=120, K=140, seed=42, mprocs=2, nprocs=2, dtype=float):
    gen = np.random.RandomState(seed)
    grid = BlacsGrid(world, mprocs, nprocs)

    if dtype == complex:
        epsilon = 1.0j
    else:
        epsilon = 0.0

    # Create descriptors for matrices on master:
    globA = grid.new_descriptor(M, K, M, K)
    globB = grid.new_descriptor(K, N, K, N)
    globC = grid.new_descriptor(M, N, M, N)
    globZ = grid.new_descriptor(K, K, K, K)
    globX = grid.new_descriptor(K, 1, K, 1)
    globY = grid.new_descriptor(M, 1, M, 1)
    globD = grid.new_descriptor(M, K, M, K)
    globS = grid.new_descriptor(M, M, M, M)
    globU = grid.new_descriptor(M, M, M, M)

    globHEC = grid.new_descriptor(K, K, K, K)

    # print globA.asarray()
    # Populate matrices local to master:
    A0 = gen.rand(*globA.shape) + epsilon * gen.rand(*globA.shape)
    B0 = gen.rand(*globB.shape) + epsilon * gen.rand(*globB.shape)
    D0 = gen.rand(*globD.shape) + epsilon * gen.rand(*globD.shape)
    X0 = gen.rand(*globX.shape) + epsilon * gen.rand(*globX.shape)

    # HEC = HEA * B
    HEA0 = gen.rand(*globHEC.shape) + epsilon * gen.rand(*globHEC.shape)
    if world.rank == 0:
        HEA0 = HEA0 + HEA0.T.conjugate()  # Make H0 hermitean
        HEA0 = np.ascontiguousarray(HEA0)

    # Local result matrices
    Y0 = globY.empty(dtype=dtype)
    C0 = globC.zeros(dtype=dtype)
    Z0 = globZ.zeros(dtype=dtype)
    S0 = globS.zeros(dtype=dtype)  # zeros needed for rank-updates
    U0 = globU.zeros(dtype=dtype)  # zeros needed for rank-updates
    HEC0 = globB.zeros(dtype=dtype)

    # Local reference matrix product:
    if rank == 0:
        # C0[:] = np.dot(A0, B0)
        gemm(1.0, B0, A0, 0.0, C0)
        # gemm(1.0, A0, A0, 0.0, Z0, transa='t')
        print(A0.shape, Z0.shape)
        Z0[:] = np.dot(A0.T, A0)
        # Y0[:] = np.dot(A0, X0)
        gemv(1.0, A0, X0.ravel(), 0.0, Y0.ravel())
        r2k(1.0, A0, D0, 0.0, S0)
        rk(1.0, A0, 0.0, U0)

        HEC0[:] = np.dot(HEA0, B0)
        sM, sN = HEA0.shape
        # We don't use upper diagonal
        for i in range(sM):
            for j in range(sN):
                if i < j:
                    HEA0[i][j] = 99999.0
        if world.rank == 0:
            print(HEA0)
    assert globA.check(A0) and globB.check(B0) and globC.check(C0)
    assert globX.check(X0) and globY.check(Y0)
    assert globD.check(D0) and globS.check(S0) and globU.check(U0)

    # Create distributed destriptors with various block sizes:
    distA = grid.new_descriptor(M, K, 2, 2)
    distB = grid.new_descriptor(K, N, 2, 4)
    distC = grid.new_descriptor(M, N, 3, 2)
    distZ = grid.new_descriptor(K, K, 5, 7)
    distX = grid.new_descriptor(K, 1, 4, 1)
    distY = grid.new_descriptor(M, 1, 3, 1)
    distD = grid.new_descriptor(M, K, 2, 3)
    distS = grid.new_descriptor(M, M, 2, 2)
    distU = grid.new_descriptor(M, M, 2, 2)
    distHE = grid.new_descriptor(K, K, 2, 4)

    # Distributed matrices:
    A = distA.empty(dtype=dtype)
    B = distB.empty(dtype=dtype)
    C = distC.empty(dtype=dtype)
    Z = distZ.empty(dtype=dtype)
    X = distX.empty(dtype=dtype)
    Y = distY.empty(dtype=dtype)
    D = distD.empty(dtype=dtype)
    S = distS.zeros(dtype=dtype)  # zeros needed for rank-updates
    U = distU.zeros(dtype=dtype)  # zeros needed for rank-updates
    HEC = distB.zeros(dtype=dtype)
    HEA = distHE.zeros(dtype=dtype)
    Redistributor(world, globA, distA).redistribute(A0, A)
    Redistributor(world, globB, distB).redistribute(B0, B)
    Redistributor(world, globX, distX).redistribute(X0, X)
    Redistributor(world, globD, distD).redistribute(D0, D)
    Redistributor(world, globHEC, distHE).redistribute(HEA0, HEA)

    pblas_simple_gemm(distA, distB, distC, A, B, C)
    pblas_simple_gemm(distA, distA, distZ, A, A, Z, transa='T')
    pblas_simple_gemv(distA, distX, distY, A, X, Y)
    pblas_simple_r2k(distA, distD, distS, A, D, S)
    pblas_simple_rk(distA, distU, A, U)
    pblas_simple_hemm(distHE, distB, distB, HEA, B, HEC, uplo='L', side='L')

    # Collect result back on master
    C1 = globC.empty(dtype=dtype)
    Y1 = globY.empty(dtype=dtype)
    S1 = globS.zeros(dtype=dtype)  # zeros needed for rank-updates
    U1 = globU.zeros(dtype=dtype)  # zeros needed for rank-updates
    HEC1 = globB.zeros(dtype=dtype)
    Redistributor(world, distC, globC).redistribute(C, C1)
    Redistributor(world, distY, globY).redistribute(Y, Y1)
    Redistributor(world, distS, globS).redistribute(S, S1)
    Redistributor(world, distU, globU).redistribute(U, U1)
    Redistributor(world, distB, globB).redistribute(HEC, HEC1)

    if rank == 0:
        gemm_err = abs(C1 - C0).max()
        gemv_err = abs(Y1 - Y0).max()
        r2k_err = abs(S1 - S0).max()
        rk_err = abs(U1 - U0).max()
        hemm_err = abs(HEC1 - HEC0).max()
        print('gemm err', gemm_err)
        print('gemv err', gemv_err)
        print('r2k err', r2k_err)
        print('rk_err', rk_err)
        print('hemm_err', hemm_err)
    else:
        gemm_err = 0.0
        gemv_err = 0.0
        r2k_err = 0.0
        rk_err = 0.0
        hemm_err = 0.0

    gemm_err = world.sum(gemm_err)  # We don't like exceptions on only one cpu
    gemv_err = world.sum(gemv_err)
    r2k_err = world.sum(r2k_err)
    rk_err = world.sum(rk_err)
    hemm_err = world.sum(hemm_err)

    equal(gemm_err, 0, tol)
    equal(gemv_err, 0, tol)
    equal(r2k_err, 0, tol)
    equal(rk_err, 0, tol)
    equal(hemm_err, 0, tol)
示例#22
0
    def calculate_rkernel(self):

        gd = self.gd
        ng_c = gd.N_c
        cell_cv = gd.cell_cv
        icell_cv = 2 * np.pi * np.linalg.inv(cell_cv)
        vol = np.linalg.det(cell_cv)

        ns = self.calc.wfs.nspins
        n_g = self.n_g  # density on rough grid

        fx_g = ns * self.get_fxc_g(n_g)  # local exchange kernel
        qc_g = (-4 * np.pi * ns / fx_g)**0.5  # cutoff functional
        flocal_g = qc_g**3 * fx_g / (6 * np.pi**2)  # ren. x-kernel for r=r'
        Vlocal_g = 2 * qc_g / np.pi  # ren. Hartree kernel for r=r'

        ng = np.prod(ng_c)  # number of grid points
        r_vg = gd.get_grid_point_coordinates()
        rx_g = r_vg[0].flatten()
        ry_g = r_vg[1].flatten()
        rz_g = r_vg[2].flatten()

        prnt('    %d grid points and %d plane waves at the Gamma point' %
             (ng, self.pd.ngmax),
             file=self.fd)

        # Unit cells
        R_Rv = []
        weight_R = []
        nR_v = self.unit_cells
        nR = np.prod(nR_v)
        for i in range(-nR_v[0] + 1, nR_v[0]):
            for j in range(-nR_v[1] + 1, nR_v[1]):
                for h in range(-nR_v[2] + 1, nR_v[2]):
                    R_Rv.append(i * cell_cv[0] + j * cell_cv[1] +
                                h * cell_cv[2])
                    weight_R.append((nR_v[0] - abs(i)) * (nR_v[1] - abs(j)) *
                                    (nR_v[2] - abs(h)) / float(nR))
        if nR > 1:
            # with more than one unit cell only the exchange kernel is
            # calculated on the grid. The bare Coulomb kernel is added
            # in PW basis and Vlocal_g only the exchange part
            dv = self.calc.density.gd.dv
            gc = (3 * dv / 4 / np.pi)**(1 / 3.)
            Vlocal_g -= 2 * np.pi * gc**2 / dv
            prnt('    Lattice point sampling: ' + '(%s x %s x %s)^2 ' %
                 (nR_v[0], nR_v[1], nR_v[2]) +
                 ' Reduced to %s lattice points' % len(R_Rv),
                 file=self.fd)

        l_g_size = -(-ng // mpi.world.size)
        l_g_range = range(mpi.world.rank * l_g_size,
                          min((mpi.world.rank + 1) * l_g_size, ng))

        fhxc_qsGr = {}
        for iq in range(len(self.ibzq_qc)):
            fhxc_qsGr[iq] = np.zeros(
                (ns, len(self.pd.G2_qG[iq]), len(l_g_range)), dtype=complex)

        inv_error = np.seterr()
        np.seterr(invalid='ignore')
        np.seterr(divide='ignore')

        t0 = time()
        # Loop over Lattice points
        for i, R_v in enumerate(R_Rv):
            # Loop over r'. f_rr and V_rr are functions of r (dim. as r_vg[0])
            if i == 1:
                prnt('      Finished 1 cell in %s seconds' % int(time() - t0) +
                     ' - estimated %s seconds left' % int(
                         (len(R_Rv) - 1) * (time() - t0)),
                     file=self.fd)
                self.fd.flush()
            if len(R_Rv) > 5:
                if (i + 1) % (len(R_Rv) / 5 + 1) == 0:
                    prnt('      Finished %s cells in %s seconds' %
                         (i, int(time() - t0)) +
                         ' - estimated %s seconds left' % int(
                             (len(R_Rv) - i) * (time() - t0) / i),
                         file=self.fd)
                    self.fd.flush()
            for g in l_g_range:
                rx = rx_g[g] + R_v[0]
                ry = ry_g[g] + R_v[1]
                rz = rz_g[g] + R_v[2]

                # |r-r'-R_i|
                rr = ((r_vg[0] - rx)**2 + (r_vg[1] - ry)**2 +
                      (r_vg[2] - rz)**2)**0.5

                n_av = (n_g + n_g.flatten()[g]) / 2.
                fx_g = ns * self.get_fxc_g(n_av, index=g)
                qc_g = (-4 * np.pi * ns / fx_g)**0.5
                x = qc_g * rr
                osc_x = np.sin(x) - x * np.cos(x)
                f_rr = fx_g * osc_x / (2 * np.pi**2 * rr**3)
                if nR > 1:  # include only exchange part of the kernel here
                    V_rr = (sici(x)[0] * 2 / np.pi - 1) / rr
                else:  # include the full kernel (also hartree part)
                    V_rr = (sici(x)[0] * 2 / np.pi) / rr

                # Terms with r = r'
                if (np.abs(R_v) < 0.001).all():
                    tmp_flat = f_rr.flatten()
                    tmp_flat[g] = flocal_g.flatten()[g]
                    f_rr = tmp_flat.reshape(ng_c)
                    tmp_flat = V_rr.flatten()
                    tmp_flat[g] = Vlocal_g.flatten()[g]
                    V_rr = tmp_flat.reshape(ng_c)
                    del tmp_flat

                f_rr[np.where(n_av < self.density_cut)] = 0.0
                V_rr[np.where(n_av < self.density_cut)] = 0.0

                f_rr *= weight_R[i]
                V_rr *= weight_R[i]

                # r-r'-R_i
                r_r = np.array([r_vg[0] - rx, r_vg[1] - ry, r_vg[2] - rz])

                # Fourier transform of r
                for iq, q in enumerate(self.ibzq_qc):
                    q_v = np.dot(q, icell_cv)
                    e_q = np.exp(-1j * gemmdot(q_v, r_r, beta=0.0))
                    f_q = self.pd.fft((f_rr + V_rr) * e_q, iq) * vol / ng
                    fhxc_qsGr[iq][0, :, g - l_g_range[0]] += f_q
                    if ns == 2:
                        f_q = self.pd.fft(V_rr * e_q, iq) * vol / ng
                        fhxc_qsGr[iq][1, :, g - l_g_range[0]] += f_q

        mpi.world.barrier()

        np.seterr(**inv_error)

        for iq, q in enumerate(self.ibzq_qc):
            npw = len(self.pd.G2_qG[iq])
            fhxc_sGsG = np.zeros((ns * npw, ns * npw), complex)
            l_pw_size = -(-npw // mpi.world.size)  # parallelize over PW below
            l_pw_range = range(mpi.world.rank * l_pw_size,
                               min((mpi.world.rank + 1) * l_pw_size, npw))

            if mpi.world.size > 1:
                # redistribute grid and plane waves in fhxc_qsGr[iq]
                bg1 = BlacsGrid(mpi.world, 1, mpi.world.size)
                bg2 = BlacsGrid(mpi.world, mpi.world.size, 1)
                bd1 = bg1.new_descriptor(npw, ng, npw, -(-ng / mpi.world.size))
                bd2 = bg2.new_descriptor(npw, ng, -(-npw / mpi.world.size), ng)

                fhxc_Glr = np.zeros((len(l_pw_range), ng), dtype=complex)
                if ns == 2:
                    Koff_Glr = np.zeros((len(l_pw_range), ng), dtype=complex)

                r = Redistributor(bg1.comm, bd1, bd2)
                r.redistribute(fhxc_qsGr[iq][0], fhxc_Glr, npw, ng)
                if ns == 2:
                    r.redistribute(fhxc_qsGr[iq][1], Koff_Glr, npw, ng)
            else:
                fhxc_Glr = fhxc_qsGr[iq][0]
                if ns == 2:
                    Koff_Glr = fhxc_qsGr[iq][1]

            # Fourier transform of r'
            for iG in range(len(l_pw_range)):
                f_g = fhxc_Glr[iG].reshape(ng_c)
                f_G = self.pd.fft(f_g.conj(), iq) * vol / ng
                fhxc_sGsG[l_pw_range[0] + iG, :npw] = f_G.conj()
                if ns == 2:
                    v_g = Koff_Glr[iG].reshape(ng_c)
                    v_G = self.pd.fft(v_g.conj(), iq) * vol / ng
                    fhxc_sGsG[npw + l_pw_range[0] + iG, :npw] = v_G.conj()

            if ns == 2:  # f_00 = f_11 and f_01 = f_10
                fhxc_sGsG[:npw, npw:] = fhxc_sGsG[npw:, :npw]
                fhxc_sGsG[npw:, npw:] = fhxc_sGsG[:npw, :npw]

            mpi.world.sum(fhxc_sGsG)
            fhxc_sGsG /= vol

            if mpi.rank == 0:
                w = Writer('fhxc_%s_%s_%s_%s.gpw' %
                           (self.tag, self.xc, self.ecut, iq))
                w.dimension('sG', ns * npw)
                w.add('fhxc_sGsG', ('sG', 'sG'), dtype=complex)
                if nR > 1:  # add Hartree kernel evaluated in PW basis
                    Gq2_G = self.pd.G2_qG[iq]
                    if (q == 0).all():
                        Gq2_G[0] = 1.
                    vq_G = 4 * np.pi / Gq2_G
                    fhxc_sGsG += np.tile(np.eye(npw) * vq_G, (ns, ns))
                w.fill(fhxc_sGsG)
                w.close()
            mpi.world.barrier()
        prnt(file=self.fd)
示例#23
0
def main(N=72, seed=42, mprocs=2, nprocs=2, dtype=float):
    gen = np.random.RandomState(seed)
    grid = BlacsGrid(world, mprocs, nprocs)

    if (dtype == complex):
        epsilon = 1.0j
    else:
        epsilon = 0.0

    # Create descriptors for matrices on master:
    glob = grid.new_descriptor(N, N, N, N)

    # print globA.asarray()
    # Populate matrices local to master:
    H0 = glob.zeros(dtype=dtype) + gen.rand(*glob.shape)
    S0 = glob.zeros(dtype=dtype) + gen.rand(*glob.shape)
    C0 = glob.empty(dtype=dtype)
    if rank == 0:
        # Complex case must have real numbers on the diagonal.
        # We make a simple complex Hermitian matrix below.
        H0 = H0 + epsilon * (0.1 * np.tri(N, N, k=-N // nprocs) +
                             0.3 * np.tri(N, N, k=-1))
        S0 = S0 + epsilon * (0.2 * np.tri(N, N, k=-N // nprocs) +
                             0.4 * np.tri(N, N, k=-1))
        # Make matrices symmetric
        rk(1.0, H0.copy(), 0.0, H0)
        rk(1.0, S0.copy(), 0.0, S0)
        # Overlap matrix must be semi-positive definite
        S0 = S0 + 50.0 * np.eye(N, N, 0)
        # Hamiltonian is usually diagonally dominant
        H0 = H0 + 75.0 * np.eye(N, N, 0)
        C0 = S0.copy()
        S0_inv = S0.copy()

    # Local result matrices
    W0 = np.empty((N), dtype=float)
    W0_g = np.empty((N), dtype=float)

    # Calculate eigenvalues / other serial results
    if rank == 0:
        diagonalize(H0.copy(), W0)
        general_diagonalize(H0.copy(), W0_g, S0.copy())
        inverse_cholesky(C0)  # result returned in lower triangle
        tri2full(S0_inv, 'L')
        S0_inv = inv(S0_inv)
        # tri2full(C0) # symmetrize

    assert glob.check(H0) and glob.check(S0) and glob.check(C0)

    # Create distributed destriptors with various block sizes:
    dist = grid.new_descriptor(N, N, 8, 8)

    # Distributed matrices:
    # We can use empty here, but end up with garbage on
    # on the other half of the triangle when we redistribute.
    # This is fine because ScaLAPACK does not care.

    H = dist.empty(dtype=dtype)
    S = dist.empty(dtype=dtype)
    Sinv = dist.empty(dtype=dtype)
    Z = dist.empty(dtype=dtype)
    C = dist.empty(dtype=dtype)
    Sinv = dist.empty(dtype=dtype)

    # Eigenvalues are non-BLACS matrices
    W = np.empty((N), dtype=float)
    W_dc = np.empty((N), dtype=float)
    W_mr3 = np.empty((N), dtype=float)
    W_g = np.empty((N), dtype=float)
    W_g_dc = np.empty((N), dtype=float)
    W_g_mr3 = np.empty((N), dtype=float)

    Glob2dist = Redistributor(world, glob, dist)
    Glob2dist.redistribute(H0, H, uplo='L')
    Glob2dist.redistribute(S0, S, uplo='L')
    Glob2dist.redistribute(S0, C, uplo='L')  # C0 was previously overwritten
    Glob2dist.redistribute(S0, Sinv, uplo='L')

    # we don't test the expert drivers anymore since there
    # might be a buffer overflow error
    ## scalapack_diagonalize_ex(dist, H.copy(), Z, W, 'L')
    scalapack_diagonalize_dc(dist, H.copy(), Z, W_dc, 'L')
    ## scalapack_diagonalize_mr3(dist, H.copy(), Z, W_mr3, 'L')
    ## scalapack_general_diagonalize_ex(dist, H.copy(), S.copy(), Z, W_g, 'L')
    scalapack_general_diagonalize_dc(dist, H.copy(), S.copy(), Z, W_g_dc, 'L')
    ## scalapack_general_diagonalize_mr3(dist, H.copy(), S.copy(), Z, W_g_mr3, 'L')

    scalapack_inverse_cholesky(dist, C, 'L')

    if dtype == complex:  # Only supported for complex for now
        scalapack_inverse(dist, Sinv, 'L')
    # Undo redistribute
    C_test = glob.empty(dtype=dtype)
    Sinv_test = glob.empty(dtype=dtype)
    Dist2glob = Redistributor(world, dist, glob)
    Dist2glob.redistribute(C, C_test)
    Dist2glob.redistribute(Sinv, Sinv_test)

    if rank == 0:
        ## diag_ex_err = abs(W - W0).max()
        diag_dc_err = abs(W_dc - W0).max()
        ## diag_mr3_err = abs(W_mr3 - W0).max()
        ## general_diag_ex_err = abs(W_g - W0_g).max()
        general_diag_dc_err = abs(W_g_dc - W0_g).max()
        ## general_diag_mr3_err = abs(W_g_mr3 - W0_g).max()
        inverse_chol_err = abs(C_test - C0).max()

        tri2full(Sinv_test, 'L')
        inverse_err = abs(Sinv_test - S0_inv).max()
        ## print 'diagonalize ex err', diag_ex_err
        print('diagonalize dc err', diag_dc_err)
        ## print 'diagonalize mr3 err', diag_mr3_err
        ## print 'general diagonalize ex err', general_diag_ex_err
        print('general diagonalize dc err', general_diag_dc_err)
        ## print 'general diagonalize mr3 err', general_diag_mr3_err
        print('inverse chol err', inverse_chol_err)
        if dtype == complex:
            print('inverse err', inverse_err)
    else:
        ## diag_ex_err = 0.0
        diag_dc_err = 0.0
        ## diag_mr3_err = 0.0
        ## general_diag_ex_err = 0.0
        general_diag_dc_err = 0.0
        ## general_diag_mr3_err = 0.0
        inverse_chol_err = 0.0
        inverse_err = 0.0

    # We don't like exceptions on only one cpu
    ## diag_ex_err = world.sum(diag_ex_err)
    diag_dc_err = world.sum(diag_dc_err)
    ## diag_mr3_err = world.sum(diag_mr3_err)
    ## general_diag_ex_err = world.sum(general_diag_ex_err)
    general_diag_dc_err = world.sum(general_diag_dc_err)
    ## general_diag_mr3_err = world.sum(general_diag_mr3_err)
    inverse_chol_err = world.sum(inverse_chol_err)
    inverse_err = world.sum(inverse_err)
    ## assert diag_ex_err < tol
    assert diag_dc_err < tol
    ## assert diag_mr3_err < tol
    ## assert general_diag_ex_err < tol
    assert general_diag_dc_err < tol
    ## assert general_diag_mr3_err < tol
    assert inverse_chol_err < tol
    if dtype == complex:
        assert inverse_err < tol
示例#24
0
    def diagonalize_full_hamiltonian(self, ham, atoms, occupations, txt,
                                     nbands=None, scalapack=None, expert=False):
        assert self.dtype == complex

        if nbands is None:
            nbands = self.pd.ngmin // self.bd.comm.size * self.bd.comm.size
        else:
            assert nbands <= self.pd.ngmin

        if expert:
            iu = nbands
        else:
            iu = None

        self.bd = bd = BandDescriptor(nbands, self.bd.comm)

        p = functools.partial(print, file=txt)
        p('Diagonalizing full Hamiltonian ({0} lowest bands)'.format(nbands))
        p('Matrix size (min, max): {0}, {1}'.format(self.pd.ngmin,
                                                    self.pd.ngmax))
        mem = 3 * self.pd.ngmax**2 * 16 / bd.comm.size / 1024**2
        p('Approximate memory usage per core: {0:.3f} MB'.format(mem))
        if bd.comm.size > 1:
            if isinstance(scalapack, (list, tuple)):
                nprow, npcol, b = scalapack
            else:
                nprow = int(round(bd.comm.size**0.5))
                while bd.comm.size % nprow != 0:
                    nprow -= 1
                npcol = bd.comm.size // nprow
                b = 64
            p('ScaLapack grid: {0}x{1},'.format(nprow, npcol),
              'block-size:', b)
            bg = BlacsGrid(bd.comm, bd.comm.size, 1)
            bg2 = BlacsGrid(bd.comm, nprow, npcol)
            scalapack = True
        else:
            nprow = npcol = 1
            scalapack = False

        self.pt.set_positions(atoms.get_scaled_positions())
        self.kpt_u[0].P_ani = None
        self.allocate_arrays_for_projections(self.pt.my_atom_indices)

        myslice = bd.get_slice()

        pb = ProgressBar(txt)
        nkpt = len(self.kpt_u)
        
        for u, kpt in enumerate(self.kpt_u):
            pb.update(u / nkpt)
            npw = len(self.pd.Q_qG[kpt.q])
            if scalapack:
                mynpw = -(-npw // bd.comm.size)
                md = BlacsDescriptor(bg, npw, npw, mynpw, npw)
                md2 = BlacsDescriptor(bg2, npw, npw, b, b)
            else:
                md = md2 = MatrixDescriptor(npw, npw)

            with self.timer('Build H and S'):
                H_GG, S_GG = self.hs(ham, kpt.q, kpt.s, md)

            if scalapack:
                r = Redistributor(bd.comm, md, md2)
                H_GG = r.redistribute(H_GG)
                S_GG = r.redistribute(S_GG)

            psit_nG = md2.empty(dtype=complex)
            eps_n = np.empty(npw)

            with self.timer('Diagonalize'):
                if not scalapack:
                    md2.general_diagonalize_dc(H_GG, S_GG, psit_nG, eps_n, 
                                               iu=iu)
                else:
                    md2.general_diagonalize_dc(H_GG, S_GG, psit_nG, eps_n)
            del H_GG, S_GG

            kpt.eps_n = eps_n[myslice].copy()

            if scalapack:
                md3 = BlacsDescriptor(bg, npw, npw, bd.mynbands, npw)
                r = Redistributor(bd.comm, md2, md3)
                psit_nG = r.redistribute(psit_nG)

            kpt.psit_nG = psit_nG[:bd.mynbands].copy()
            del psit_nG

            with self.timer('Projections'):
                self.pt.integrate(kpt.psit_nG, kpt.P_ani, kpt.q)

            kpt.f_n = None

        pb.finish()
        
        occupations.calculate(self)
示例#25
0
    def calculate_forces(self, hamiltonian, F_av):
        self.timer.start('LCAO forces')

        spos_ac = self.tci.atoms.get_scaled_positions() % 1.0
        ksl = self.ksl
        nao = ksl.nao
        mynao = ksl.mynao
        nq = len(self.kd.ibzk_qc)
        dtype = self.dtype
        tci = self.tci
        gd = self.gd
        bfs = self.basis_functions

        Mstart = ksl.Mstart
        Mstop = ksl.Mstop

        from gpaw.kohnsham_layouts import BlacsOrbitalLayouts
        isblacs = isinstance(ksl, BlacsOrbitalLayouts)  # XXX

        if not isblacs:
            self.timer.start('TCI derivative')
            dThetadR_qvMM = np.empty((nq, 3, mynao, nao), dtype)
            dTdR_qvMM = np.empty((nq, 3, mynao, nao), dtype)
            dPdR_aqvMi = {}
            for a in self.basis_functions.my_atom_indices:
                ni = self.setups[a].ni
                dPdR_aqvMi[a] = np.empty((nq, 3, nao, ni), dtype)
            tci.calculate_derivative(spos_ac, dThetadR_qvMM, dTdR_qvMM,
                                     dPdR_aqvMi)
            gd.comm.sum(dThetadR_qvMM)
            gd.comm.sum(dTdR_qvMM)
            self.timer.stop('TCI derivative')

            my_atom_indices = bfs.my_atom_indices
            atom_indices = bfs.atom_indices

            def _slices(indices):
                for a in indices:
                    M1 = bfs.M_a[a] - Mstart
                    M2 = M1 + self.setups[a].nao
                    if M2 > 0:
                        yield a, max(0, M1), M2

            def slices():
                return _slices(atom_indices)

            def my_slices():
                return _slices(my_atom_indices)

        #
        #         -----                    -----
        #          \    -1                  \    *
        # E      =  )  S     H    rho     =  )  c     eps  f  c
        #  mu nu   /    mu x  x z    z nu   /    n mu    n  n  n nu
        #         -----                    -----
        #          x z                       n
        #
        # We use the transpose of that matrix.  The first form is used
        # if rho is given, otherwise the coefficients are used.
        self.timer.start('Initial')

        rhoT_uMM = []
        ET_uMM = []

        if not isblacs:
            if self.kpt_u[0].rho_MM is None:
                self.timer.start('Get density matrix')
                for kpt in self.kpt_u:
                    rhoT_MM = ksl.get_transposed_density_matrix(
                        kpt.f_n, kpt.C_nM)
                    rhoT_uMM.append(rhoT_MM)
                    ET_MM = ksl.get_transposed_density_matrix(
                        kpt.f_n * kpt.eps_n, kpt.C_nM)
                    ET_uMM.append(ET_MM)

                    if hasattr(kpt, 'c_on'):
                        # XXX does this work with BLACS/non-BLACS/etc.?
                        assert self.bd.comm.size == 1
                        d_nn = np.zeros((self.bd.mynbands, self.bd.mynbands),
                                        dtype=kpt.C_nM.dtype)
                        for ne, c_n in zip(kpt.ne_o, kpt.c_on):
                            d_nn += ne * np.outer(c_n.conj(), c_n)
                        rhoT_MM += ksl.get_transposed_density_matrix_delta(\
                            d_nn, kpt.C_nM)
                        ET_MM += ksl.get_transposed_density_matrix_delta(\
                            d_nn * kpt.eps_n, kpt.C_nM)
                self.timer.stop('Get density matrix')
            else:
                rhoT_uMM = []
                ET_uMM = []
                for kpt in self.kpt_u:
                    H_MM = self.eigensolver.calculate_hamiltonian_matrix(\
                        hamiltonian, self, kpt)
                    tri2full(H_MM)
                    S_MM = kpt.S_MM.copy()
                    tri2full(S_MM)
                    ET_MM = np.linalg.solve(S_MM,
                                            gemmdot(H_MM,
                                                    kpt.rho_MM)).T.copy()
                    del S_MM, H_MM
                    rhoT_MM = kpt.rho_MM.T.copy()
                    rhoT_uMM.append(rhoT_MM)
                    ET_uMM.append(ET_MM)
        self.timer.stop('Initial')

        if isblacs:  # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
            from gpaw.blacs import BlacsGrid, Redistributor

            def get_density_matrix(f_n, C_nM, redistributor):
                rho1_mm = ksl.calculate_blocked_density_matrix(f_n,
                                                               C_nM).conj()
                rho_mm = redistributor.redistribute(rho1_mm)
                return rho_mm

            pcutoff_a = [
                max([pt.get_cutoff() for pt in setup.pt_j])
                for setup in self.setups
            ]
            phicutoff_a = [
                max([phit.get_cutoff() for phit in setup.phit_j])
                for setup in self.setups
            ]

            # XXX should probably use bdsize x gdsize instead
            # That would be consistent with some existing grids
            grid = BlacsGrid(ksl.block_comm, self.gd.comm.size,
                             self.bd.comm.size)

            blocksize1 = -(-nao // grid.nprow)
            blocksize2 = -(-nao // grid.npcol)
            # XXX what are rows and columns actually?
            desc = grid.new_descriptor(nao, nao, blocksize1, blocksize2)

            rhoT_umm = []
            ET_umm = []
            redistributor = Redistributor(grid.comm, ksl.mmdescriptor, desc)
            Fpot_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                self.timer.start('Get density matrix')
                rhoT_mm = get_density_matrix(kpt.f_n, kpt.C_nM, redistributor)
                rhoT_umm.append(rhoT_mm)
                self.timer.stop('Get density matrix')

                self.timer.start('Potential')
                rhoT_mM = ksl.distribute_to_columns(rhoT_mm, desc)

                vt_G = hamiltonian.vt_sG[kpt.s]
                Fpot_av += bfs.calculate_force_contribution(
                    vt_G, rhoT_mM, kpt.q)
                del rhoT_mM
                self.timer.stop('Potential')

            self.timer.start('Get density matrix')
            for kpt in self.kpt_u:
                ET_mm = get_density_matrix(kpt.f_n * kpt.eps_n, kpt.C_nM,
                                           redistributor)
                ET_umm.append(ET_mm)
            self.timer.stop('Get density matrix')

            M1start = blocksize1 * grid.myrow
            M2start = blocksize2 * grid.mycol

            M1stop = min(M1start + blocksize1, nao)
            M2stop = min(M2start + blocksize2, nao)

            m1max = M1stop - M1start
            m2max = M2stop - M2start

        if not isblacs:
            # Kinetic energy contribution
            #
            #           ----- d T
            #  a         \       mu nu
            # F += 2 Re   )   -------- rho
            #            /    d R         nu mu
            #           -----    mu nu
            #        mu in a; nu
            #
            Fkin_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                dEdTrhoT_vMM = (dTdR_qvMM[kpt.q] *
                                rhoT_uMM[u][np.newaxis]).real
                for a, M1, M2 in my_slices():
                    Fkin_av[a, :] += \
                        2.0 * dEdTrhoT_vMM[:, M1:M2].sum(-1).sum(-1)
            del dEdTrhoT_vMM

            # Density matrix contribution due to basis overlap
            #
            #            ----- d Theta
            #  a          \           mu nu
            # F  += -2 Re  )   ------------  E
            #             /        d R        nu mu
            #            -----        mu nu
            #         mu in a; nu
            #
            Ftheta_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                dThetadRE_vMM = (dThetadR_qvMM[kpt.q] *
                                 ET_uMM[u][np.newaxis]).real
                for a, M1, M2 in my_slices():
                    Ftheta_av[a, :] += \
                        -2.0 * dThetadRE_vMM[:, M1:M2].sum(-1).sum(-1)
            del dThetadRE_vMM

        if isblacs:
            from gpaw.lcao.overlap import TwoCenterIntegralCalculator
            self.timer.start('Prepare TCI loop')
            M_a = bfs.M_a

            Fkin2_av = np.zeros_like(F_av)
            Ftheta2_av = np.zeros_like(F_av)

            cell_cv = tci.atoms.cell
            spos_ac = tci.atoms.get_scaled_positions() % 1.0

            overlapcalc = TwoCenterIntegralCalculator(self.kd.ibzk_qc,
                                                      derivative=False)

            # XXX this is not parallel *AT ALL*.
            self.timer.start('Get neighbors')
            nl = tci.atompairs.pairs.neighbors
            r_and_offset_aao = get_r_and_offsets(nl, spos_ac, cell_cv)
            atompairs = r_and_offset_aao.keys()
            atompairs.sort()
            self.timer.stop('Get neighbors')

            T_expansions = tci.T_expansions
            Theta_expansions = tci.Theta_expansions
            P_expansions = tci.P_expansions
            nq = len(self.kd.ibzk_qc)

            dH_asp = hamiltonian.dH_asp

            self.timer.start('broadcast dH')
            alldH_asp = {}
            for a in range(len(self.setups)):
                gdrank = bfs.sphere_a[a].rank
                if gdrank == gd.rank:
                    dH_sp = dH_asp[a]
                else:
                    ni = self.setups[a].ni
                    dH_sp = np.empty((self.nspins, ni * (ni + 1) // 2))
                gd.comm.broadcast(dH_sp, gdrank)
                # okay, now everyone gets copies of dH_sp
                alldH_asp[a] = dH_sp
            self.timer.stop('broadcast dH')

            # This will get sort of hairy.  We need to account for some
            # three-center overlaps, such as:
            #
            #         a1
            #      Phi   ~a3    a3  ~a3     a2     a2,a1
            #   < ----  |p  > dH   <p   |Phi  > rho
            #      dR
            #
            # To this end we will loop over all pairs of atoms (a1, a3),
            # and then a sub-loop over (a3, a2).
            from gpaw.lcao.overlap import DerivativeAtomicDisplacement

            class Displacement(DerivativeAtomicDisplacement):
                def __init__(self, a1, a2, R_c, offset):
                    phases = overlapcalc.phaseclass(overlapcalc.ibzk_qc,
                                                    offset)
                    DerivativeAtomicDisplacement.__init__(
                        self, None, a1, a2, R_c, offset, phases)

            # Cache of Displacement objects with spherical harmonics with
            # evaluated spherical harmonics.
            disp_aao = {}

            def get_displacements(a1, a2, maxdistance):
                # XXX the way maxdistance is handled it can lead to
                # bad caching when different maxdistances are passed
                # to subsequent calls with same pair of atoms
                disp_o = disp_aao.get((a1, a2))
                if disp_o is None:
                    disp_o = []
                    for R_c, offset in r_and_offset_aao[(a1, a2)]:
                        if np.linalg.norm(R_c) > maxdistance:
                            continue
                        disp = Displacement(a1, a2, R_c, offset)
                        disp_o.append(disp)
                    disp_aao[(a1, a2)] = disp_o
                return [disp for disp in disp_o if disp.r < maxdistance]

            self.timer.stop('Prepare TCI loop')
            self.timer.start('Not so complicated loop')

            for (a1, a2) in atompairs:
                if a1 >= a2:
                    # Actually this leads to bad load balance.
                    # We should take a1 > a2 or a1 < a2 equally many times.
                    # Maybe decide which of these choices
                    # depending on whether a2 % 1 == 0
                    continue

                m1start = M_a[a1] - M1start
                m2start = M_a[a2] - M2start
                if m1start >= blocksize1 or m2start >= blocksize2:
                    continue  # (we have only one block per CPU)

                T_expansion = T_expansions.get(a1, a2)
                Theta_expansion = Theta_expansions.get(a1, a2)
                #P_expansion = P_expansions.get(a1, a2)
                nm1, nm2 = T_expansion.shape

                m1stop = min(m1start + nm1, m1max)
                m2stop = min(m2start + nm2, m2max)

                if m1stop <= 0 or m2stop <= 0:
                    continue

                m1start = max(m1start, 0)
                m2start = max(m2start, 0)
                J1start = max(0, M1start - M_a[a1])
                J2start = max(0, M2start - M_a[a2])
                M1stop = J1start + m1stop - m1start
                J2stop = J2start + m2stop - m2start

                dTdR_qvmm = T_expansion.zeros((nq, 3), dtype=dtype)
                dThetadR_qvmm = Theta_expansion.zeros((nq, 3), dtype=dtype)

                disp_o = get_displacements(a1, a2,
                                           phicutoff_a[a1] + phicutoff_a[a2])
                for disp in disp_o:
                    disp.evaluate_overlap(T_expansion, dTdR_qvmm)
                    disp.evaluate_overlap(Theta_expansion, dThetadR_qvmm)

                for u, kpt in enumerate(self.kpt_u):
                    rhoT_mm = rhoT_umm[u][m1start:m1stop, m2start:m2stop]
                    ET_mm = ET_umm[u][m1start:m1stop, m2start:m2stop]
                    Fkin_v = 2.0 * (
                        dTdR_qvmm[kpt.q][:, J1start:M1stop, J2start:J2stop] *
                        rhoT_mm[np.newaxis]).real.sum(-1).sum(-1)
                    Ftheta_v = 2.0 * (dThetadR_qvmm[kpt.q][:, J1start:M1stop,
                                                           J2start:J2stop] *
                                      ET_mm[np.newaxis]).real.sum(-1).sum(-1)
                    Fkin2_av[a1] += Fkin_v
                    Fkin2_av[a2] -= Fkin_v
                    Ftheta2_av[a1] -= Ftheta_v
                    Ftheta2_av[a2] += Ftheta_v

            Fkin_av = Fkin2_av
            Ftheta_av = Ftheta2_av
            self.timer.stop('Not so complicated loop')

            dHP_and_dSP_aauim = {}

            a2values = {}
            for (a2, a3) in atompairs:
                if not a3 in a2values:
                    a2values[a3] = []
                a2values[a3].append(a2)

            Fatom_av = np.zeros_like(F_av)
            Frho_av = np.zeros_like(F_av)
            self.timer.start('Complicated loop')
            for a1, a3 in atompairs:
                if a1 == a3:
                    # Functions reside on same atom, so their overlap
                    # does not change when atom is displaced
                    continue
                m1start = M_a[a1] - M1start
                if m1start >= blocksize1:
                    continue

                P_expansion = P_expansions.get(a1, a3)
                nm1 = P_expansion.shape[0]
                m1stop = min(m1start + nm1, m1max)
                if m1stop <= 0:
                    continue

                m1start = max(m1start, 0)
                J1start = max(0, M1start - M_a[a1])
                J1stop = J1start + m1stop - m1start

                disp_o = get_displacements(a1, a3,
                                           phicutoff_a[a1] + pcutoff_a[a3])
                if len(disp_o) == 0:
                    continue

                dPdR_qvmi = P_expansion.zeros((nq, 3), dtype=dtype)
                for disp in disp_o:
                    disp.evaluate_overlap(P_expansion, dPdR_qvmi)

                dPdR_qvmi = dPdR_qvmi[:, :, J1start:J1stop, :].copy()
                for a2 in a2values[a3]:
                    m2start = M_a[a2] - M2start
                    if m2start >= blocksize2:
                        continue

                    P_expansion2 = P_expansions.get(a2, a3)
                    nm2 = P_expansion2.shape[0]
                    m2stop = min(m2start + nm2, m2max)
                    if m2stop <= 0:
                        continue

                    disp_o = get_displacements(a2, a3,
                                               phicutoff_a[a2] + pcutoff_a[a3])
                    if len(disp_o) == 0:
                        continue

                    m2start = max(m2start, 0)
                    J2start = max(0, M2start - M_a[a2])
                    J2stop = J2start + m2stop - m2start

                    if (a2, a3) in dHP_and_dSP_aauim:
                        dHP_uim, dSP_uim = dHP_and_dSP_aauim[(a2, a3)]
                    else:
                        P_qmi = P_expansion2.zeros((nq, ), dtype=dtype)
                        for disp in disp_o:
                            disp.evaluate_direct(P_expansion2, P_qmi)
                        P_qmi = P_qmi[:, J2start:J2stop].copy()
                        dH_sp = alldH_asp[a3]
                        dS_ii = self.setups[a3].dO_ii

                        dHP_uim = []
                        dSP_uim = []
                        for u, kpt in enumerate(self.kpt_u):
                            dH_ii = unpack(dH_sp[kpt.s])
                            dHP_im = np.dot(P_qmi[kpt.q], dH_ii).T.conj()
                            # XXX only need nq of these
                            dSP_im = np.dot(P_qmi[kpt.q], dS_ii).T.conj()
                            dHP_uim.append(dHP_im)
                            dSP_uim.append(dSP_im)
                            dHP_and_dSP_aauim[(a2, a3)] = dHP_uim, dSP_uim

                    for u, kpt in enumerate(self.kpt_u):
                        rhoT_mm = rhoT_umm[u][m1start:m1stop, m2start:m2stop]
                        ET_mm = ET_umm[u][m1start:m1stop, m2start:m2stop]
                        dPdRdHP_vmm = np.dot(dPdR_qvmi[kpt.q], dHP_uim[u])
                        dPdRdSP_vmm = np.dot(dPdR_qvmi[kpt.q], dSP_uim[u])

                        Fatom_c = 2.0 * (dPdRdHP_vmm *
                                         rhoT_mm).real.sum(-1).sum(-1)
                        Frho_c = 2.0 * (dPdRdSP_vmm *
                                        ET_mm).real.sum(-1).sum(-1)
                        Fatom_av[a1] += Fatom_c
                        Fatom_av[a3] -= Fatom_c

                        Frho_av[a1] -= Frho_c
                        Frho_av[a3] += Frho_c

            self.timer.stop('Complicated loop')

        if not isblacs:
            # Potential contribution
            #
            #           -----      /  d Phi  (r)
            #  a         \        |        mu    ~
            # F += -2 Re  )       |   ---------- v (r)  Phi  (r) dr rho
            #            /        |     d R                nu          nu mu
            #           -----    /         a
            #        mu in a; nu
            #
            self.timer.start('Potential')
            Fpot_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                vt_G = hamiltonian.vt_sG[kpt.s]
                Fpot_av += bfs.calculate_force_contribution(
                    vt_G, rhoT_uMM[u], kpt.q)
            self.timer.stop('Potential')

            # Density matrix contribution from PAW correction
            #
            #           -----                        -----
            #  a         \      a                     \     b
            # F +=  2 Re  )    Z      E        - 2 Re  )   Z      E
            #            /      mu nu  nu mu          /     mu nu  nu mu
            #           -----                        -----
            #           mu nu                    b; mu in a; nu
            #
            # with
            #                  b*
            #         -----  dP
            #   b      \       i mu    b   b
            #  Z     =  )   -------- dS   P
            #   mu nu  /     dR        ij  j nu
            #         -----    b mu
            #           ij
            #
            self.timer.start('Paw correction')
            Frho_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                work_MM = np.zeros((mynao, nao), dtype)
                ZE_MM = None
                for b in my_atom_indices:
                    setup = self.setups[b]
                    dO_ii = np.asarray(setup.dO_ii, dtype)
                    dOP_iM = np.zeros((setup.ni, nao), dtype)
                    gemm(1.0, self.P_aqMi[b][kpt.q], dO_ii, 0.0, dOP_iM, 'c')
                    for v in range(3):
                        gemm(1.0, dOP_iM,
                             dPdR_aqvMi[b][kpt.q][v][Mstart:Mstop], 0.0,
                             work_MM, 'n')
                        ZE_MM = (work_MM * ET_uMM[u]).real
                        for a, M1, M2 in slices():
                            dE = 2 * ZE_MM[M1:M2].sum()
                            Frho_av[a, v] -= dE  # the "b; mu in a; nu" term
                            Frho_av[b, v] += dE  # the "mu nu" term
            del work_MM, ZE_MM
            self.timer.stop('Paw correction')

            # Atomic density contribution
            #            -----                         -----
            #  a          \     a                       \     b
            # F  += -2 Re  )   A      rho       + 2 Re   )   A      rho
            #             /     mu nu    nu mu          /     mu nu    nu mu
            #            -----                         -----
            #            mu nu                     b; mu in a; nu
            #
            #                  b*
            #         ----- d P
            #  b       \       i mu   b   b
            # A     =   )   ------- dH   P
            #  mu nu   /    d R       ij  j nu
            #         -----    b mu
            #           ij
            #
            self.timer.start('Atomic Hamiltonian force')
            Fatom_av = np.zeros_like(F_av)
            for u, kpt in enumerate(self.kpt_u):
                for b in my_atom_indices:
                    H_ii = np.asarray(unpack(hamiltonian.dH_asp[b][kpt.s]),
                                      dtype)
                    HP_iM = gemmdot(
                        H_ii,
                        np.ascontiguousarray(self.P_aqMi[b][kpt.q].T.conj()))
                    for v in range(3):
                        dPdR_Mi = dPdR_aqvMi[b][kpt.q][v][Mstart:Mstop]
                        ArhoT_MM = (gemmdot(dPdR_Mi, HP_iM) * rhoT_uMM[u]).real
                        for a, M1, M2 in slices():
                            dE = 2 * ArhoT_MM[M1:M2].sum()
                            Fatom_av[a, v] += dE  # the "b; mu in a; nu" term
                            Fatom_av[b, v] -= dE  # the "mu nu" term
            self.timer.stop('Atomic Hamiltonian force')

        F_av += Fkin_av + Fpot_av + Ftheta_av + Frho_av + Fatom_av
        self.timer.start('Wait for sum')
        ksl.orbital_comm.sum(F_av)
        if self.bd.comm.rank == 0:
            self.kd.comm.sum(F_av, 0)
        self.timer.stop('Wait for sum')
        self.timer.stop('LCAO forces')
示例#26
0
    def __init__(self, sl_lrtddft, nrows, lr_comms):
        self.mprocs, self.nprocs, self.block_size = tuple(sl_lrtddft)

        self.lr_comms = lr_comms

        # for SCALAPACK we need TRANSPOSED MATRIX (and vector)
        #
        # -----------------------------------------------------------------
        # matrix

        # original grid, ie, how matrix is stored
        self.orig_matrix_grid = BlacsGrid(self.lr_comms.parent_comm,
                                          self.lr_comms.dd_comm.size,
                                          self.lr_comms.eh_comm.size)

        # solve grid
        self.solve_matrix_grid = BlacsGrid(self.lr_comms.parent_comm,
                                           self.mprocs, self.nprocs)

        # M = rows, N = cols
        M = nrows * 4
        N = nrows * 4
        mb = 4
        nb = 4
        self.orig_matrix_descr = self.orig_matrix_grid.new_descriptor(
            N, M, nb, mb)

        bs = self.block_size
        self.solve_matrix_descr = self.solve_matrix_grid.new_descriptor(
            N, M, bs, bs)

        self.matrix_in_redist = Redistributor(self.lr_comms.parent_comm,
                                              self.orig_matrix_descr,
                                              self.solve_matrix_descr)

        # -----------------------------------------------------------------
        # vector

        # original grid, ie, how vector is stored
        self.orig_vector_grid = BlacsGrid(
            self.lr_comms.parent_comm, 1,
            (self.lr_comms.dd_comm.size * self.lr_comms.eh_comm.size))

        # solve grid
        #self.solve_vector_grid = BlacsGrid(self.lr_comms.parent_comm, self.mprocs, self.nprocs)

        # M = rows, N = cols
        M = nrows * 4
        Nrhs = 1
        mb = 4
        nb = 1
        self.orig_vector_descr = self.orig_vector_grid.new_descriptor(
            Nrhs, M, nb, mb)

        bs = self.block_size
        self.solve_vector_descr = self.solve_matrix_grid.new_descriptor(
            Nrhs, M, 1, bs)

        self.vector_in_redist = Redistributor(self.lr_comms.parent_comm,
                                              self.orig_vector_descr,
                                              self.solve_vector_descr)

        self.vector_out_redist = Redistributor(self.lr_comms.parent_comm,
                                               self.solve_vector_descr,
                                               self.orig_vector_descr)
示例#27
0
    def __init__(self, sl_lrtddft, nkq, dd_comm, eh_comm):
        mcpus, ncpus, blocksize = tuple(sl_lrtddft)
        self.world = eh_comm.parent
        self.dd_comm = dd_comm
        if self.world is None:
            self.world = self.dd_comm

        # All the ranks within domain communicator contain the omega matrix
        # construct new communicator only on domain masters
        eh_ranks = np.arange(eh_comm.size) * dd_comm.size
        self.eh_comm2 = self.world.new_communicator(eh_ranks)

        self.eh_grid = BlacsGrid(self.eh_comm2, eh_comm.size, 1)
        self.eh_descr = self.eh_grid.new_descriptor(nkq, nkq, 1, nkq)
        self.diag_grid = BlacsGrid(self.world, mcpus, ncpus)
        self.diag_descr = self.diag_grid.new_descriptor(
            nkq, nkq, blocksize, blocksize)

        self.redistributor_in = Redistributor(self.world, self.eh_descr,
                                              self.diag_descr)
        self.redistributor_out = Redistributor(self.world, self.diag_descr,
                                               self.eh_descr)
        """
        # -----------------------------------------------------------------
        # for SCALAPACK we need TRANSPOSED MATRIX (and vector)
        # -----------------------------------------------------------------
        # M = rows, N = cols
        M = nkq*4; N = nkq*4; mb = nkq*4; nb = 4; Nrhs = 1
        # Matrix, mp=1, np=eh_comm.size
        self.eh_grid2a = BlacsGrid(self.eh_comm2, eh_comm.size, 1)
        # Vector, mp=eh_comm.size, np=1
        self.eh_grid2b = BlacsGrid(self.eh_comm2, 1, eh_comm.size)
        self.eh_descr2a = self.eh_grid2a.new_descriptor(N,    M,  nb, mb)
        self.eh_descr2b = self.eh_grid2b.new_descriptor(Nrhs, N,   1, nb)

        self.solve_descr2a =self.diag_grid.new_descriptor(N, M,
                                                          blocksize, blocksize)
        self.solve_descr2b =self.diag_grid.new_descriptor(Nrhs, N,
                                                          1, blocksize)

        self.redist_solve_in_2a = Redistributor(self.world,
                                                self.eh_descr2a,
                                                self.solve_descr2a)
        self.redist_solve_in_2b = Redistributor(self.world,
                                                self.eh_descr2b,
                                                self.solve_descr2b)

        self.redist_solve_out_2a = Redistributor(self.world,
                                                 self.solve_descr2a,
                                                 self.eh_descr2a)
        self.redist_solve_out_2b = Redistributor(self.world,
                                                 self.solve_descr2b,
                                                 self.eh_descr2b)
        """

        # -----------------------------------------------------------------
        # for SCALAPACK we need TRANSPOSED MATRIX (and vector)
        # -----------------------------------------------------------------
        # M = rows, N = cols
        M = nkq * 4
        N = nkq * 4
        mb = 4
        nb = 4
        Nrhs = 1
        # Matrix, mp=1, np=eh_comm.size
        self.eh_grid2a = BlacsGrid(self.world, dd_comm.size, eh_comm.size)
        # Vector, mp=eh_comm.size, np=1
        self.eh_grid2b = BlacsGrid(self.world, 1, dd_comm.size * eh_comm.size)
        self.eh_descr2a = self.eh_grid2a.new_descriptor(N, M, nb, mb)
        self.eh_descr2b = self.eh_grid2b.new_descriptor(Nrhs, N, Nrhs, nb)
        self.solve_descr2a = self.diag_grid.new_descriptor(
            N, M, blocksize, blocksize)
        self.solve_descr2b = self.diag_grid.new_descriptor(
            Nrhs, N, Nrhs, blocksize)

        self.redist_solve_in_2a = Redistributor(self.world, self.eh_descr2a,
                                                self.solve_descr2a)
        self.redist_solve_in_2b = Redistributor(self.world, self.eh_descr2b,
                                                self.solve_descr2b)

        self.redist_solve_out_2a = Redistributor(self.world,
                                                 self.solve_descr2a,
                                                 self.eh_descr2a)
        self.redist_solve_out_2b = Redistributor(self.world,
                                                 self.solve_descr2b,
                                                 self.eh_descr2b)
示例#28
0
    def calculate_blocked_density_matrix(self, f_n, C_nM):
        nbands = self.bd.nbands
        nao = self.nao
        dtype = C_nM.dtype

        self.nMdescriptor.checkassert(C_nM)
        if self.gd.rank == 0:
            Cf_nM = (C_nM * f_n[:, None])
        else:
            C_nM = self.nM_unique_descriptor.zeros(dtype=dtype)
            Cf_nM = self.nM_unique_descriptor.zeros(dtype=dtype)

        r = Redistributor(self.block_comm, self.nM_unique_descriptor,
                          self.mmdescriptor)

        Cf_mm = self.mmdescriptor.zeros(dtype=dtype)
        r.redistribute(Cf_nM, Cf_mm, nbands, nao)
        del Cf_nM

        C_mm = self.mmdescriptor.zeros(dtype=dtype)
        r.redistribute(C_nM, C_mm, nbands, nao)
        # no use to delete C_nM as it's in the input...

        rho_mm = self.mmdescriptor.zeros(dtype=dtype)

        if 1:  # if self.libelpa is None:
            pblas_simple_gemm(self.mmdescriptor,
                              self.mmdescriptor,
                              self.mmdescriptor,
                              Cf_mm,
                              C_mm,
                              rho_mm,
                              transa='C')
        else:
            # elpa_hermitian_multiply was not faster than the ordinary
            # multiplication in the test.  The way we have things distributed,
            # we need to transpose things at the moment.
            #
            # Rather than enabling this, we should store the coefficients
            # in an appropriate 2D block cyclic format (c_nm) and not the
            # current C_nM format.  This makes it possible to avoid
            # redistributing the coefficients at all.  But we don't have time
            # to implement this at the moment.
            mul = self.libelpa.hermitian_multiply
            desc = self.mmdescriptor
            from gpaw.utilities.scalapack import pblas_tran

            def T(array):
                tmp = array.copy()
                pblas_tran(alpha=1.0,
                           a_MN=tmp,
                           beta=0.0,
                           c_NM=array,
                           desca=desc,
                           descc=desc)

            T(C_mm)
            T(Cf_mm)
            mul(C_mm, Cf_mm, rho_mm, desc, desc, desc, uplo_a='X', uplo_c='X')

        return rho_mm
示例#29
0
    def tddft_init(self):
        if self.tddft_initialized:
            return
        self.blacs = self.wfs.ksl.using_blacs
        if self.blacs:
            self.ksl = ksl = self.wfs.ksl
            nao = ksl.nao
            nbands = ksl.bd.nbands
            mynbands = ksl.bd.mynbands
            blocksize = ksl.blocksize

            from gpaw.blacs import Redistributor
            if self.wfs.world.rank == 0:
                print('BLACS Parallelization')

            # Parallel grid descriptors
            grid = ksl.blockgrid
            assert grid.nprow * grid.npcol == self.wfs.ksl.block_comm.size
            # FOR DEBUG
            self.MM_descriptor = grid.new_descriptor(nao, nao, nao, nao)
            self.mm_block_descriptor = grid.new_descriptor(
                nao, nao, blocksize, blocksize)
            self.Cnm_block_descriptor = grid.new_descriptor(
                nbands, nao, blocksize, blocksize)
            # self.CnM_descriptor = ksl.blockgrid.new_descriptor(nbands,
            #     nao, mynbands, nao)
            self.mM_column_descriptor = ksl.single_column_grid.new_descriptor(
                nao, nao, ksl.naoblocksize, nao)
            self.CnM_unique_descriptor = ksl.single_column_grid.new_descriptor(
                nbands, nao, mynbands, nao)

            # Redistributors
            self.mm2MM = Redistributor(ksl.block_comm,
                                       self.mm_block_descriptor,
                                       self.MM_descriptor)  # XXX FOR DEBUG
            self.MM2mm = Redistributor(ksl.block_comm, self.MM_descriptor,
                                       self.mm_block_descriptor)  # FOR DEBUG
            self.Cnm2nM = Redistributor(ksl.block_comm,
                                        self.Cnm_block_descriptor,
                                        self.CnM_unique_descriptor)
            self.CnM2nm = Redistributor(ksl.block_comm,
                                        self.CnM_unique_descriptor,
                                        self.Cnm_block_descriptor)
            self.mM2mm = Redistributor(ksl.block_comm,
                                       self.mM_column_descriptor,
                                       self.mm_block_descriptor)

            for kpt in self.wfs.kpt_u:
                scalapack_zero(self.mm_block_descriptor, kpt.S_MM, 'U')
                scalapack_zero(self.mm_block_descriptor, kpt.T_MM, 'U')

            # XXX to propagator class
            if self.propagator == 'taylor' and self.blacs:
                # cholS_mm = self.mm_block_descriptor.empty(dtype=complex)
                for kpt in self.wfs.kpt_u:
                    kpt.invS_MM = kpt.S_MM.copy()
                    scalapack_inverse(self.mm_block_descriptor, kpt.invS_MM,
                                      'L')
            if self.propagator == 'taylor' and not self.blacs:
                tmp = inv(self.wfs.kpt_u[0].S_MM)
                self.wfs.kpt_u[0].invS = tmp

        # Reset the density mixer
        self.density.set_mixer(DummyMixer())
        self.tddft_initialized = True
        for k, kpt in enumerate(self.wfs.kpt_u):
            kpt.C2_nM = kpt.C_nM.copy()
示例#30
0
from gpaw.blacs import BlacsGrid, Redistributor

if world.size < 2:
    raise ValueError('Runs on two or more processors')

grid = BlacsGrid(world, 2, world.size // 2)

desc = grid.new_descriptor(12, 8, 2, 3)

a = desc.zeros()
a[:] = world.rank

subdesc = grid.new_descriptor(7, 7, 2, 2)
b = subdesc.zeros()

r = Redistributor(grid.comm, desc, subdesc, uplo='G')

ia = 3
ja = 2
ib = 1
jb = 1
M = 4
N = 5

r.redistribute(a, b, M, N, ia, ja, ib, jb)

a0 = desc.collect_on_master(a)
b0 = subdesc.collect_on_master(b)
if world.rank == 0:
    print a0
    print b0