Пример #1
0
    def __init__(self, comm, maxvecs, nmo_tot, nmo_max, nkpts, kp_sym=False):
        # to do:
        # keep usage of n2k1,n2k2 but assign entire rows of the (k1,k2) matrix
        # also add extra partitioning of i*j matrix, along nproc_pk cores
        # algorithm is the same
        self.maxvecs = maxvecs * nmo_tot
        self.rank = comm.rank
        self.size = comm.size

        if comm.size <= nkpts:
            self.kkbounds = numpy.zeros(comm.size + 1, dtype=numpy.int32)
            if kp_sym:
                work = nkpts
            else:
                work = nkpts * nkpts
            for i in range(comm.size):
                self.kkbounds[i], self.kkbounds[i + 1] = fair_share(
                    work, comm.size, i)
            self.kk0, self.kkN = fair_share(work, comm.size, comm.rank)
            self.nproc_pk = 1
        else:
            if (comm.size > nkpts) and (comm.size % nkpts != 0):
                print(" # If nproc > nkpts, nproc must evenly divide number of"
                      "  k-points.")
                sys.exit()
            self.nproc_pk = comm.size // nkpts
            if kp_sym:
                self.kk0 = comm.rank // self.nproc_pk
                self.kkN = self.kk0 + 1
            else:
                self.mykpt = comm.rank // self.nproc_pk
                self.kk0 = self.mykpt * nkpts
                self.kkN = self.kk0 + nkpts

        self.ij0, self.ijN = fair_share(nmo_max * nmo_max, self.nproc_pk,
                                        comm.rank % self.nproc_pk)
        self.nij = self.ijN - self.ij0
        self.nkk = self.kkN - self.kk0
        if not kp_sym:
            self.n2k1 = numpy.zeros(self.nkk, dtype=numpy.int32)
            self.n2k2 = numpy.zeros(self.nkk, dtype=numpy.int32)
            cnt = 0
            for k in range(self.kk0, self.kkN):
                self.n2k1[cnt] = k // nkpts
                self.n2k2[cnt] = k % nkpts
                cnt += 1
Пример #2
0
    def run(self, comm, Xaoik, Xaolj, part, kpts, nmo_pk, nmo_max, Qi, gmap):
        # Setup residual matrix.
        ngs = Xaoik.shape[1]
        nkpts = len(kpts)
        t0 = time.clock()
        residual, k1max, k2max, i1max, i2max, maxv = (self.generate_diagonal(
            Xaoik, Xaolj, part, nmo_pk))
        t1 = time.clock()
        if part.rank == 0 and self.verbose:
            print(" # Time to generate diagonal (initial residual):"
                  " {:.2e}".format(t1 - t0))
            sys.stdout.flush()
        comm.Allgather(
            numpy.array([k1max, k2max, i1max, i2max, maxv],
                        dtype=numpy.float64), self.maxres_buff)
        vmax = 0
        k3, k4, i3, i4, vmax = self.find_k3k4(comm.size)
        try:
            done = numpy.zeros((nkpts, nkpts, nmo_max, nmo_max), numpy.int32)
            maxresidual = numpy.zeros(part.maxvecs, dtype=numpy.float64)
            cholvecs = numpy.zeros((part.nkk, part.nij, part.maxvecs),
                                   dtype=numpy.complex128)
            Xkl = numpy.zeros(ngs, dtype=numpy.complex128)
            Xkl0 = numpy.zeros(ngs + part.maxvecs, dtype=numpy.complex128)
            Vbuff = numpy.zeros(part.maxvecs, dtype=numpy.complex128)
        except MemoryError:
            print(" # Problems allocating memory for auxiliary structures for "
                  "Cholesky solver.")
        done[k3, k4, i3, i4] = 1

        tstart = time.clock()
        if comm.rank == 0:
            sys.stdout.flush()

        more = True  # for parallel
        numv = 0
        if comm.rank == 0:
            header = [
                "iteration", "max_residual", "total_time", "time_k3k4",
                "time_comp_cholv", "time_buff"
            ]
            if self.verbose:
                print(format_fixed_width_strings(header))
        while more:
            t0 = time.clock()
            # stop condition
            if numv >= part.maxvecs:
                print(" Too many vectors needed to converge. Increase maximum "
                      "number of vectors.")
                break
            kkmax = k3 * nkpts + k4
            if comm.size <= nkpts:
                ipr = bisect(part.kkbounds[1:comm.size + 1], kkmax)
                if (kkmax >= part.kk0) & (kkmax < part.kkN):
                    assert (comm.rank == ipr)
            else:
                i34 = i3 * nmo_pk[k4] + i4
                for i in range(part.nproc_pk):
                    ij0_, ijN_ = fair_share(nmo_max * nmo_max, part.nproc_pk,
                                            i)
                    if i34 < ijN_:
                        ipr = k3 * part.nproc_pk + i
                        break

            # bcast Xaolj/CV[k3,k4,i34,0:numv]
            if comm.rank == ipr:
                assert (((i3 * nmo_pk[k4] + i4) >= part.ij0) &
                        ((i3 * nmo_pk[k4] + i4) < part.ijN))
                Xkl0[0:ngs] = Xaolj[kkmax - part.kk0, 0:ngs,
                                    i3 * nmo_pk[k4] + i4 - part.ij0]
                Xkl0[ngs:ngs + numv] = (cholvecs[kkmax - part.kk0,
                                                 i3 * nmo_pk[k4] + i4 -
                                                 part.ij0, 0:numv])
                Vbuff[0:numv] = (cholvecs[kkmax - part.kk0,
                                          i3 * nmo_pk[k4] + i4 - part.ij0,
                                          0:numv])
                comm.Bcast(Xkl0[0:ngs + numv], root=ipr)
            else:
                comm.Bcast(Xkl0[0:ngs + numv], root=ipr)
                Vbuff[0:numv] = Xkl0[ngs:ngs + numv]

            # add new Cholesky vector
            # 1. evaluate new column (ik|imax,kmax)

            t1 = time.clock()
            tadd = 0.0

            for k in range(part.nkk):
                k1 = part.n2k1[k]
                k2 = part.n2k2[k]
                if k3 == self.kconserv[k1, k2, k4]:
                    q1 = kpts[k2] - kpts[k1] + kpts[k3] - kpts[k4]
                    if numpy.sum(abs(q1)) > 1e-9:
                        t_ = time.clock()
                        ip = -1
                        for ii in range(27):
                            if numpy.sum(
                                    numpy.linalg.norm(q1 - Qi[ii, :])) < 1e-12:
                                ip = ii
                                break
                        if ip < 0:
                            print(" # Could not find Q: {} {} ".format(q1, Qi))
                            sys.exit()
                        for ix in range(ngs):
                            Xkl[ix] = Xkl0[gmap[ip, ix]]
                        tadd += time.clock() - t_
                    else:
                        Xkl[0:ngs] = Xkl0[0:ngs]
                    n_ = min(nmo_pk[k1] * nmo_pk[k2], part.ijN) - part.ij0
                    cholvecs[k, 0:n_,
                             numv] = numpy.dot(Xaoik[k, :, 0:n_].T, Xkl.conj())

            t2 = time.clock()

            # 2. subtract projection along previous components
            cholvecs[:, :, numv] -= numpy.dot(cholvecs[:, :, 0:numv],
                                              Vbuff[0:numv].conj())
            cholvecs[:, :, numv] /= math.sqrt(vmax)

            # update residual
            residual -= (cholvecs[:, :, numv] *
                         cholvecs[:, :, numv].conj()).real

            maxv = 0
            for k in range(part.nkk):
                k1 = part.n2k1[k]
                k2 = part.n2k2[k]
                for ij in range(part.nij):
                    if (ij + part.ij0) >= nmo_pk[k1] * nmo_pk[k2]:
                        break
                    if abs(residual[k, ij]) > maxv:
                        maxv = abs(residual[k, ij])
                        k1max = k1
                        k2max = k2
                        i1max = (ij + part.ij0) // nmo_pk[k2]
                        i2max = (ij + part.ij0) % nmo_pk[k2]

            t3 = time.clock()

            # assemble full CV on head node
            comm.Allgather(
                numpy.array([k1max, k2max, i1max, i2max, maxv],
                            dtype=numpy.float64), self.maxres_buff)
            k3, k4, i3, i4, vmax = self.find_k3k4(comm.size)

            t4 = time.clock()

            # only root keeps track of residual and I/O
            if part.rank == 0:

                maxresidual[numv] = abs(vmax)

                # print and evaluate stop condition
                output = [vmax, t4 - t0, t3 - t2, t2 - t1, t1 - t0]
                if self.verbose:
                    print("{:17d} ".format(numv) +
                          format_fixed_width_floats(output))
                # print("{:8d}  {:13.8e}".format(numv, vmax))
                tstart = time.clock()

                if numv % 100 == 0:
                    sys.stdout.flush()

            numv += 1
            if vmax < self.gtol_chol:
                more = False

            if (done[k3, k4, i3, i4] > 0) and more:
                print("Error in Cholesky decomposition. "
                      "done[imax,kmax]>0.", k3, k4, i3, i4, vmax)
                sys.exit()
            done[k3, k4, i3, i4] = 1

            t6 = time.clock()
        return cholvecs[:, :, :numv]
Пример #3
0
    def run(self, comm, X, h5file):
        # Unpack for convenience.
        ngs = self.ngs
        nmo_max = self.nmo_max
        nkpts = self.nkpts
        part = self.part
        nmo_pk = self.nmo_pk
        QKToK2 = self.QKToK2
        kpts = self.kpts
        # Setup residual matrix.
        mem = 2 * 16 * nkpts * ngs * nmo_max**2 / 1024**3
        if self.verbose and comm.rank == 0:
            print(" # Approx total memory required for orbital products: "
                  "{:.2e} GB.".format(mem))
        mem_verbose = comm.rank == 0 and self.verbose > 1
        if mem_verbose:
            print(" # Approx memory per MPI task for auxiliary structures.")
        Xaoik = alloc_helper((part.nkk, ngs, part.nij),
                             dtype=numpy.complex128,
                             name='Xaoik',
                             verbose=mem_verbose)
        Xaolj = alloc_helper((part.nkk, ngs, part.nij),
                             dtype=numpy.complex128,
                             name='Xaolj',
                             verbose=mem_verbose)
        done = alloc_helper((nkpts, nmo_max, nmo_max),
                            numpy.int32,
                            name='done',
                            verbose=mem_verbose)
        maxresidual = alloc_helper((part.maxvecs, ),
                                   dtype=numpy.float64,
                                   name='maxresidual',
                                   verbose=mem_verbose)
        cholvecs = alloc_helper((part.nkk, part.nij, part.maxvecs),
                                dtype=numpy.complex128,
                                name='cholvecs',
                                verbose=mem_verbose)
        Xkl = alloc_helper((ngs, ),
                           dtype=numpy.complex128,
                           name='xkl',
                           verbose=mem_verbose)
        Xkl0 = alloc_helper((ngs + part.maxvecs, ),
                            dtype=numpy.complex128,
                            name='xkl0',
                            verbose=mem_verbose)
        Vbuff = alloc_helper((part.maxvecs, ),
                             dtype=numpy.complex128,
                             name='Vbuff',
                             verbose=mem_verbose)
        num_cholvecs = alloc_helper((nkpts, ),
                                    dtype=numpy.int32,
                                    name='num_cholvecs',
                                    verbose=mem_verbose)
        header = [
            "iteration", "max_residual", "total_time", "time_k3k4",
            "time_comp_cholv", "time_buff"
        ]

        for Q in range(nkpts):
            t0 = time.clock()
            if Q > self.kminus[Q]:
                continue
            if comm.rank == 0 and self.verbose:
                print(
                    " # Calculating factorization for momentum: {}".format(Q))
                print(" # Generating orbital products")
                sys.stdout.flush()

            t1 = time.clock()

            maxresidual[:] = 0
            done[:, :, :] = 0
            cholvecs[:, :, :] = 0
            start = time.time()
            self.generate_orbital_products(Q, X, Xaoik, Xaolj)
            if self.verbose and comm.rank == 0:
                print(" # Time to generate orbital products: "
                      "{:13.8e}".format(time.time() - start))
            residual, k1max, k2max, i1max, i2max, maxv = (
                self.generate_diagonal(Q, Xaoik, Xaolj))

            buff = numpy.array([k1max, k2max, i1max, i2max, maxv],
                               dtype=numpy.float64)
            comm.Allgather(buff, self.maxres_buff)
            k3, k4, i3, i4, vmax = self.find_k3k4(comm.size)
            done[k3, i3, i4] = 1

            tstart = time.clock()
            if comm.rank == 0:
                sys.stdout.flush()

            cnt = 0
            vmaxold = vmax
            more = True  # for parallel
            numv = 0
            while more:

                t0 = time.clock()
                # stop condition
                if comm.rank == 0 and self.verbose:
                    if numv == 0:
                        print(format_fixed_width_strings(header))
                if numv >= self.maxvecs:
                    print(" Too many vectors needed to converge. "
                          "Increase maximum number of vectors.")
                    break

                if comm.size <= nkpts:
                    ipr = bisect(part.kkbounds[1:comm.size + 1], k3)
                    if (k3 >= part.kk0) and (k3 < part.kkN):
                        assert (comm.rank == ipr)
                else:
                    i34 = i3 * nmo_pk[k4] + i4
                    for i in range(part.nproc_pk):
                        ij0_, ijN_ = fair_share(nmo_max * nmo_max,
                                                part.nproc_pk, i)
                        if i34 < ijN_:
                            ipr = k3 * part.nproc_pk + i
                            break

                if comm.rank == ipr:
                    assert ((i3 * nmo_pk[k4] + i4 >= part.ij0))
                    assert ((i3 * nmo_pk[k4] + i4 < part.ijN))
                    Xkl0[0:ngs] = Xaolj[k3 - part.kk0, 0:ngs,
                                        i3 * nmo_pk[k4] + i4 - part.ij0]
                    Xkl0[ngs:ngs + numv] = cholvecs[k3 - part.kk0,
                                                    i3 * nmo_pk[k4] + i4 -
                                                    part.ij0, 0:numv]
                    Vbuff[0:numv] = cholvecs[k3 - part.kk0,
                                             i3 * nmo_pk[k4] + i4 - part.ij0,
                                             0:numv]
                    comm.Bcast(Xkl0[0:ngs + numv], root=ipr)
                else:
                    comm.Bcast(Xkl0[0:ngs + numv], root=ipr)
                    Vbuff[0:numv] = Xkl0[ngs:ngs + numv]

                # add new Cholesky vector
                # 1. evaluate new column (ik|imax,kmax)

                t1 = time.clock()
                tadd = 0.0
                for k in range(part.nkk):
                    k1 = k + part.kk0
                    k2 = QKToK2[Q][k1]
                    if part.ij0 > nmo_pk[k1] * nmo_pk[k2]:
                        continue
                    if numpy.sum(abs(kpts[k2] - kpts[k1] + kpts[k3] -
                                     kpts[k4])) > 1e-9:
                        t_ = time.clock()
                        q1 = kpts[k2] - kpts[k1] + kpts[k3] - kpts[k4]
                        ip = -1
                        for ii in range(27):
                            if numpy.sum(
                                    numpy.linalg.norm(q1 -
                                                      self.Qi[ii, :])) < 1e-12:
                                ip = ii
                                break
                        if ip < 0:
                            print("Could not find Q: {} {}".format(
                                q1, self.Qi))
                            sys.exit()
                        for ix in range(ngs):
                            Xkl[ix] = Xkl0[self.gmap[ip, ix]]
                        tadd += time.clock() - t_
                    else:
                        Xkl[0:ngs] = Xkl0[0:ngs]
                    n_ = min(nmo_pk[k1] * nmo_pk[k2], part.ijN) - part.ij0
                    cholvecs[k, 0:n_,
                             numv] = numpy.dot(Xaoik[k, :, 0:n_].T, Xkl.conj())
                t2 = time.clock()

                # 2. substract projection along previous components
                cholvecs[:, :, numv] -= numpy.dot(cholvecs[:, :, 0:numv],
                                                  Vbuff[0:numv].conj())
                cholvecs[:, :, numv] /= math.sqrt(vmax)

                # update residual
                residual -= (cholvecs[:, :, numv] *
                             cholvecs[:, :, numv].conj()).real

                maxv = 0
                k1max = -1
                k2max = -1
                i1max = -1
                i2max = -1
                for k in range(part.nkk):
                    k1 = k + part.kk0
                    k2 = self.QKToK2[Q][k1]
                    # if ij0 > nmo_pk[k1]*nmo_pk[k2]:
                    # continue
                    for ij in range(part.nij):
                        if (ij + part.ij0) >= nmo_pk[k1] * nmo_pk[k2]:
                            break
                        if abs(residual[k, ij]) > maxv:
                            maxv = abs(residual[k, ij])
                            k1max = k1
                            k2max = k2
                            i1max = (ij + part.ij0) // nmo_pk[k2]
                            i2max = (ij + part.ij0) % nmo_pk[k2]

                t3 = time.clock()

                # assemble full CV on head node
                buff = numpy.array([k1max, k2max, i1max, i2max, maxv],
                                   dtype=numpy.float64)
                comm.Allgather(buff, self.maxres_buff)
                k3, k4, i3, i4, vmax = self.find_k3k4(comm.size)

                t4 = time.clock()

                # only root keeps track of residual and I/O
                if comm.rank == 0:

                    maxresidual[numv] = abs(vmax)
                    # print and evaluate stop condition
                    output = [vmax, t4 - t0, t3 - t2, t2 - t1, t1 - t0]
                    if self.verbose:
                        print("{:17d} ".format(numv) +
                              format_fixed_width_floats(output))
                    tstart = time.clock()

                    if numv % 100 == 0:
                        sys.stdout.flush()

                numv += 1
                if vmax < self.gtol_chol:
                    more = False

                if done[k3, i3, i4] > 0 and more:
                    # TODO: What is this?
                    print(
                        "Error in Cholesky decomposition. "
                        "done[imax,kmax]>0.", k3, k4, i3, i4, vmax)
                    sys.exit()
                done[k3, i3, i4] = 1

                t6 = time.clock()

            comm.barrier()
            num_cholvecs[Q] = numv

            if h5file.phdf or comm.rank == 0:
                LQ = h5file.grp_v2.create_dataset(
                    "L" + str(Q), (nkpts, nmo_max * nmo_max * numv, 2),
                    dtype=numpy.float64)
                # cholvecs[nkk,nij,maxvecs]
                for kk in range(part.nkk):
                    T_ = to_qmcpack_complex(cholvecs[kk, :, 0:numv].copy())
                    T_ = numpy.reshape(T_, (-1, 2)) / math.sqrt(nkpts * 1.0)
                    LQ[kk + part.kk0, part.ij0 * numv:part.ijN * numv, :] = T_
                    T_ = None
            else:
                Ldim = h5file.grp_v2.create_dataset(
                    "Ldim" + str(Q),
                    data=numpy.array([
                        part.nkk, part.nij, part.kk0, part.ij0, part.ijN, numv
                    ],
                                     dtype=numpy.int32))
                LQ = h5file.grp_v2.create_dataset(
                    "L" + str(Q), (part.nkk, part.nij * numv, 2),
                    dtype=numpy.float64)
                # cholvecs[nkk,nij,maxvecs]
                for kk in range(part.nkk):
                    T_ = to_qmcpack_complex(cholvecs[kk, :, 0:numv].copy())
                    T_ = numpy.reshape(T_, (-1, 2)) / math.sqrt(nkpts * 1.0)
                    LQ[kk, :, :] = T_
                    T_ = None
            comm.barrier()

        h5file.grp.create_dataset("NCholPerKP", data=num_cholvecs)
        comm.barrier()