Exemplo n.º 1
0
def peek(target, key):

    if type(target) == gpt.lattice:
        return gpt.mview(target[key])

    elif type(target) == list:

        pos, tidx, shape = map_key(target, key)
        v_obj = [y for x in target for y in x.v_obj]

        return gpt.mview(cgpt.lattice_export(v_obj, pos, tidx, shape))

    else:
        assert 0
Exemplo n.º 2
0
    def write_lattice(self, ctx, l):
        g = l.grid
        tag = (ctx + "\0").encode("utf-8")
        ntag = len(tag)
        nd = len(g.fdimensions)

        # create cartesian view for writing
        if "mpi" in self.params:
            mpi = self.params["mpi"]
        else:
            mpi = g.mpi
        cv0 = gpt.cartesian_view(-1, mpi, g.fdimensions, g.cb,
                                 l.checkerboard())

        # file positions
        pos = numpy.array([0] * cv0.ranks, dtype=numpy.uint64)

        # describe
        res = g.describe() + " " + cv0.describe() + " " + l.describe()

        # find tasks for my node
        views_for_node = self.views_for_node(cv0, g)

        # performance
        dt_distr, dt_crc, dt_write = 0.0, 0.0, 0.0
        #g.barrier()
        t0 = gpt.time()
        szGB = 0.0

        # need to write all views
        for xk, iview in enumerate(views_for_node):

            f, p = self.open_view(xk, iview, True, mpi, g.fdimensions, g.cb,
                                  l.checkerboard())

            # all nodes are needed to communicate
            dt_distr -= gpt.time()
            mv = gpt.mview(l[p])
            dt_distr += gpt.time()

            # write data
            if not f is None:
                # description and data
                dt_crc -= gpt.time()
                crc = gpt.crc32(mv)
                dt_crc += gpt.time()
                dt_write -= gpt.time()
                pos[iview] = f.tell()
                f.write(ntag.to_bytes(4, byteorder='little'))
                f.write(tag)
                f.write(crc.to_bytes(4, byteorder='little'))
                f.write(nd.to_bytes(4, byteorder='little'))
                for i in range(nd):
                    f.write(g.gdimensions[i].to_bytes(4, byteorder='little'))
                for i in range(nd):
                    f.write(g.mpi[i].to_bytes(4, byteorder='little'))
                f.write(len(mv).to_bytes(8, byteorder='little'))
                f.write(mv)
                f.flush()
                dt_write += gpt.time()
                szGB += len(mv) / 1024.**3.

        t1 = gpt.time()

        szGB = g.globalsum(szGB)
        if self.verbose and dt_crc != 0.0:
            gpt.message(
                "Wrote %g GB at %g GB/s (%g GB/s for distribution, %g GB/s for checksum, %g GB/s for writing, %d views per node)"
                % (szGB, szGB / (t1 - t0), szGB / dt_distr, szGB / dt_crc,
                   szGB / dt_write, len(views_for_node)))
        g.globalsum(pos)
        return res + " " + " ".join(["%d" % x for x in pos])
Exemplo n.º 3
0
# grid
L = [16, 16, 16, 32]
grid_dp = g.grid(L, g.double)
grid_sp = g.grid(L, g.single)

# test fields
l_dp = g.random("test").cnormal(g.vcolor(grid_dp))
l_sp = g.convert(l_dp, g.single)

################################################################################
# Test mview
################################################################################
c = g.coordinates(l_dp)
x = l_dp[c]
mv = g.mview(x)
assert mv.itemsize == 1 and mv.shape[0] == len(mv)
assert sys.getrefcount(x) == 3
del mv
assert sys.getrefcount(x) == 2

################################################################################
# Test assignments
################################################################################
pos = l_dp.mview_coordinates()
lhs = g.lattice(l_dp)


def assign_copy():
    g.copy(lhs, l_dp)
Exemplo n.º 4
0
def save(filename, objs, params):

    # split data to save
    assert len(objs) == 3
    basis = objs[0]
    cevec = objs[1]
    ev = objs[2]

    # verbosity
    verbose = gpt.default.is_verbose("io")
    if verbose:
        gpt.message(
            "Saving %d basis vectors, %d coarse-grid vectors, %d eigenvalues to %s"
            % (len(basis), len(cevec), len(ev), filename))

    # create directory
    if gpt.rank() == 0:
        os.makedirs(filename, exist_ok=True)

    # now sync since only root has created directory
    gpt.barrier()

    # write eigenvalues
    if gpt.rank() == 0:
        f = open("%s/eigen-values.txt" % filename, "wt")
        f.write("%d\n" % len(ev))
        for v in ev:
            f.write("%.15E\n" % v)
        f.close()

    # site checkerboard
    # only odd is used in this file format but
    # would be easy to generalize here
    site_cb = gpt.odd

    # grids
    assert len(basis) > 0
    assert len(cevec) > 0
    fgrid = basis[0].grid
    cgrid = cevec[0].grid

    # mpi layout
    if "mpi" in params:
        mpi = params["mpi"]
    else:
        mpi = fgrid.mpi
    assert mpi[0] == 1  # assert no mpi in 5th direction

    # params
    assert basis[0].checkerboard() == site_cb
    nd = 5
    assert len(fgrid.ldimensions) == nd
    fdimensions = fgrid.fdimensions
    ldimensions = [conformDiv(fdimensions[i], mpi[i]) for i in range(nd)]
    assert fgrid.precision == gpt.single
    s = ldimensions
    b = [
        conformDiv(fgrid.fdimensions[i], cgrid.fdimensions[i])
        for i in range(nd)
    ]
    nb = [conformDiv(s[i], b[i]) for i in range(nd)]
    neigen = len(cevec)
    nbasis = len(basis)
    if "nsingle" in params:
        nsingle = params["nsingle"]
        assert nsingle <= nbasis
    else:
        nsingle = nbasis
    nsingleCap = min([nsingle, nbasis])
    blocks = numpy.prod(nb)
    FP16_COEF_EXP_SHARE_FLOATS = 10

    # write metadata
    if gpt.rank() == 0:
        fmeta = open("%s/metadata.txt" % filename, "wt")
        for i in range(nd):
            fmeta.write("s[%d] = %d\n" % (i, s[(i + 1) % nd]))
        for i in range(nd):
            fmeta.write("b[%d] = %d\n" % (i, b[(i + 1) % nd]))
        for i in range(nd):
            fmeta.write("nb[%d] = %d\n" % (i, nb[(i + 1) % nd]))
        fmeta.write("neig = %d\n" % neigen)
        fmeta.write("nkeep = %d\n" % nbasis)
        fmeta.write("nkeep_single = %d\n" % nsingle)
        fmeta.write("blocks = %d\n" % blocks)
        fmeta.write("FP16_COEF_EXP_SHARE_FLOATS = %d\n" %
                    FP16_COEF_EXP_SHARE_FLOATS)
        fmeta.flush()  # write crc32 later

    # create cartesian view on fine grid
    cv0 = gpt.cartesian_view(-1, mpi, fdimensions, fgrid.cb, site_cb)
    views = cv0.views_for_node(fgrid)
    crc32 = numpy.array([0] * cv0.ranks, dtype=numpy.uint64)
    # timing
    t0 = gpt.time()
    totalSizeGB = 0
    dt_fp16 = 1e-30
    dt_distr = 1e-30
    dt_munge = 1e-30
    dt_crc = 1e-30
    dt_fwrite = 1e-30
    t0 = gpt.time()

    # load all views
    if verbose:
        gpt.message("Saving %s with %d views per node" %
                    (filename, len(views)))

    for i, v in enumerate(views):
        cv = gpt.cartesian_view(v if v is not None else -1, mpi, fdimensions,
                                fgrid.cb, site_cb)
        cvc = gpt.cartesian_view(v if v is not None else -1, mpi,
                                 cgrid.fdimensions, gpt.full, gpt.none)
        pos_coarse = gpt.coordinates(cvc, "canonical")

        dn, fn = get_local_name(filename, cv)
        if fn is not None:
            os.makedirs(dn, exist_ok=True)

        # sizes
        slot_lsites = numpy.prod(cv.view_dimensions)
        assert slot_lsites % blocks == 0
        block_data_size_single = slot_lsites * 12 // 2 // blocks * 2 * 4
        block_data_size_fp16 = FP_16_SIZE(slot_lsites * 12 // 2 // blocks * 2,
                                          24)
        coarse_block_size_part_fp32 = 2 * (4 * nsingleCap)
        coarse_block_size_part_fp16 = 2 * (FP_16_SIZE(
            nbasis - nsingleCap, FP16_COEF_EXP_SHARE_FLOATS))
        coarse_vector_size = (coarse_block_size_part_fp32 +
                              coarse_block_size_part_fp16) * blocks
        totalSize = (
            blocks *
            (block_data_size_single * nsingleCap + block_data_size_fp16 *
             (nbasis - nsingleCap)) + neigen * coarse_vector_size)
        totalSizeGB += totalSize / 1024.0**3.0 if v is not None else 0.0

        # checksum
        crc32_comp = 0

        # file
        f = gpt.FILE(fn, "wb") if fn is not None else None

        # block positions
        pos = [
            cgpt.coordinates_from_block(cv.top, cv.bottom, b, nb,
                                        "canonicalOdd") for b in range(blocks)
        ]

        # group blocks
        read_blocks = blocks
        block_reduce = 1
        max_read_blocks = get_param(params, "max_read_blocks", 8)
        while read_blocks > max_read_blocks and read_blocks % 2 == 0:
            pos = [
                numpy.concatenate((pos[2 * i + 0], pos[2 * i + 1]))
                for i in range(read_blocks // 2)
            ]
            block_data_size_single *= 2
            block_data_size_fp16 *= 2
            read_blocks //= 2
            block_reduce *= 2

        # make read-only to enable caching
        for x in pos:
            x.setflags(write=0)

        # single-precision data
        data = memoryview(bytearray(block_data_size_single * nsingleCap))
        reduced_size = len(data) // block_reduce

        for b in range(read_blocks):
            fgrid.barrier()
            dt_distr -= gpt.time()
            data_munged = gpt.peek(
                basis[0:nsingleCap], pos[b]
            )  # TODO: can already munge here using new index interface
            dt_distr += gpt.time()

            if f is not None:
                dt_munge -= gpt.time()
                for l in range(block_reduce):
                    cgpt.munge_inner_outer(
                        data[reduced_size * l:reduced_size * (l + 1)],
                        data_munged[reduced_size * l:reduced_size * (l + 1)],
                        nsingleCap,
                        len(pos[b]) // block_reduce,
                    )
                dt_munge += gpt.time()
                dt_crc -= gpt.time()
                crc32_comp = gpt.crc32(data, crc32_comp)
                dt_crc += gpt.time()

            fgrid.barrier()
            dt_fwrite -= gpt.time()
            if f is not None:
                f.write(data)
                globalWriteGB = len(data) / 1024.0**3.0
            else:
                globalWriteGB = 0.0
            globalWriteGB = fgrid.globalsum(globalWriteGB)
            dt_fwrite += gpt.time()
            totalSizeGB += globalWriteGB

            if verbose:
                gpt.message(
                    "* write %g GB: fwrite at %g GB/s, crc32 at %g GB/s, munge at %g GB/s, distribute at %g GB/s"
                    % (
                        totalSizeGB,
                        totalSizeGB / dt_fwrite,
                        totalSizeGB / dt_crc,
                        totalSizeGB / dt_munge,
                        totalSizeGB / dt_distr,
                    ))

        # fp16 data
        if nbasis != nsingleCap:
            # allocate data buffer
            data_fp32 = memoryview(
                bytearray(block_data_size_single * (nbasis - nsingleCap)))
            data = memoryview(
                bytearray(block_data_size_fp16 * (nbasis - nsingleCap)))
            reduced_size = len(data_fp32) // block_reduce
            for b in range(read_blocks):
                fgrid.barrier()
                dt_distr -= gpt.time()
                data_munged = gpt.peek(basis[nsingleCap:nbasis], pos[b])
                dt_distr += gpt.time()

                if f is not None:
                    dt_munge -= gpt.time()
                    for l in range(block_reduce):
                        cgpt.munge_inner_outer(
                            data_fp32[reduced_size * l:reduced_size * (l + 1)],
                            data_munged[reduced_size * l:reduced_size *
                                        (l + 1)],
                            nsingleCap,
                            len(pos[b]) // block_reduce,
                        )
                    dt_munge += gpt.time()
                    dt_fp16 -= gpt.time()
                    cgpt.fp32_to_fp16(data, data_fp32, 24)
                    dt_fp16 += gpt.time()
                    dt_crc -= gpt.time()
                    crc32_comp = gpt.crc32(data, crc32_comp)
                    dt_crc += gpt.time()

                fgrid.barrier()
                dt_fwrite -= gpt.time()
                if f is not None:
                    f.write(data)
                    globalWriteGB = len(data) / 1024.0**3.0
                else:
                    globalWriteGB = 0.0
                globalWriteGB = fgrid.globalsum(globalWriteGB)
                dt_fwrite += gpt.time()
                totalSizeGB += globalWriteGB

                if verbose:
                    gpt.message(
                        "* write %g GB: fwrite at %g GB/s, crc32 at %g GB/s, munge at %g GB/s, distribute at %g GB/s, fp16 at %g GB/s"
                        % (
                            totalSizeGB,
                            totalSizeGB / dt_fwrite,
                            totalSizeGB / dt_crc,
                            totalSizeGB / dt_munge,
                            totalSizeGB / dt_distr,
                            totalSizeGB / dt_fp16,
                        ))

        # coarse grid data
        data = memoryview(bytearray(coarse_vector_size))
        for j in range(neigen):
            fgrid.barrier()
            dt_distr -= gpt.time()
            data_fp32 = gpt.mview(cevec[j][pos_coarse])
            dt_distr += gpt.time()

            if f is not None:
                dt_fp16 -= gpt.time()
                cgpt.fp32_to_mixed_fp32fp16(
                    data,
                    data_fp32,
                    coarse_block_size_part_fp32,
                    coarse_block_size_part_fp16,
                    FP16_COEF_EXP_SHARE_FLOATS,
                )
                dt_fp16 += gpt.time()
                dt_crc -= gpt.time()
                crc32_comp = gpt.crc32(data, crc32_comp)
                dt_crc += gpt.time()

            fgrid.barrier()
            dt_fwrite -= gpt.time()
            if f is not None:
                f.write(data)
                globalWriteGB = len(data) / 1024.0**3.0
            else:
                globalWriteGB = 0.0
            globalWriteGB = fgrid.globalsum(globalWriteGB)
            dt_fwrite += gpt.time()
            totalSizeGB += globalWriteGB

            if verbose and j % (neigen // 10) == 0:
                gpt.message(
                    "* write %g GB: fwrite at %g GB/s, crc32 at %g GB/s, munge at %g GB/s, distribute at %g GB/s, fp16 at %g GB/s"
                    % (
                        totalSizeGB,
                        totalSizeGB / dt_fwrite,
                        totalSizeGB / dt_crc,
                        totalSizeGB / dt_munge,
                        totalSizeGB / dt_distr,
                        totalSizeGB / dt_fp16,
                    ))

        # save crc
        crc32[cv.rank] = crc32_comp

    # synchronize crc32
    fgrid.globalsum(crc32)

    # timing
    t1 = gpt.time()

    # write crc to metadata
    if gpt.rank() == 0:
        for i in range(len(crc32)):
            fmeta.write("crc32[%d] = %X\n" % (i, crc32[i]))
        fmeta.close()

    # verbosity
    if verbose:
        gpt.message("* save %g GB at %g GB/s" % (totalSizeGB, totalSizeGB /
                                                 (t1 - t0)))
Exemplo n.º 5
0
    for y in range(4):
        for z in range(8):
            for t in range(8):
                src[x, y, z,
                    t] = g.vcomplex([x + t * 1j, y + t * 1j, z + t * 1j] * 10,
                                    30)

# now create a random partition of this lattice distributed over all nodes
c = (g.coordinates(grid).copy().view(np.ndarray)
     )  # copy to make it writeable and lift local_coordinate type
random.seed(13)
for tr in range(10):
    shift = [random.randint(0, 8) for i in range(4)]
    for i in range(len(c)):
        for j in range(4):
            c[i][j] = (c[i][j] + shift[j]) % grid.gdimensions[j]
    data = src[c]  # test global uniform memory system
    mvrestore = g.mview(data)
    err2 = 0.0
    for i, pos in enumerate(c):
        for n in range(10):
            err2 += ((data[i][3 * n + 0].real - pos[0])**2.0 +
                     (data[i][3 * n + 1].real - pos[1])**2.0 +
                     (data[i][3 * n + 2].real - pos[2])**2.0)
    dst[c] = mvrestore
    err2 = grid.globalsum(err2)
    err1 = g.norm2(src - dst)
    g.message("Test shift", tr, "/ 10 :", shift, "difference norm/e2:", err1,
              err2)
    assert err1 == 0.0 and err2 == 0.0