Пример #1
0
def simple_mpi_communication_entrypoint():
    cl_ctx = cl.create_some_context()
    queue = cl.CommandQueue(cl_ctx)
    from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    num_parts = comm.Get_size()

    mesh_dist = MPIMeshDistributor(comm)

    if mesh_dist.is_mananger_rank():
        from meshmode.mesh.generation import generate_regular_rect_mesh
        mesh = generate_regular_rect_mesh(a=(-1, ) * 2,
                                          b=(1, ) * 2,
                                          n=(3, ) * 2)

        part_per_element = get_partition_by_pymetis(mesh, num_parts)

        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element,
                                               num_parts)
    else:
        local_mesh = mesh_dist.receive_mesh_part()

    vol_discr = DGDiscretizationWithBoundaries(cl_ctx,
                                               local_mesh,
                                               order=5,
                                               mpi_communicator=comm)

    sym_x = sym.nodes(local_mesh.dim)
    myfunc_symb = sym.sin(np.dot(sym_x, [2, 3]))
    myfunc = bind(vol_discr, myfunc_symb)(queue)

    sym_all_faces_func = sym.cse(
        sym.interp("vol", "all_faces")(sym.var("myfunc")))
    sym_int_faces_func = sym.cse(
        sym.interp("vol", "int_faces")(sym.var("myfunc")))
    sym_bdry_faces_func = sym.cse(
        sym.interp(sym.BTAG_ALL,
                   "all_faces")(sym.interp("vol",
                                           sym.BTAG_ALL)(sym.var("myfunc"))))

    bound_face_swap = bind(
        vol_discr,
        sym.interp("int_faces", "all_faces")(
            sym.OppositeInteriorFaceSwap("int_faces")(sym_int_faces_func)) -
        (sym_all_faces_func - sym_bdry_faces_func))

    # print(bound_face_swap)
    # 1/0

    hopefully_zero = bound_face_swap(queue, myfunc=myfunc)
    import numpy.linalg as la
    error = la.norm(hopefully_zero.get())

    np.set_printoptions(threshold=100000000, suppress=True)
    print(hopefully_zero)
    print(error)

    assert error < 1e-14
Пример #2
0
def simple_mpi_communication_entrypoint():
    cl_ctx = cl.create_some_context()
    queue = cl.CommandQueue(cl_ctx)
    actx = PyOpenCLArrayContext(queue, force_device_scalars=True)

    from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis
    from meshmode.mesh import BTAG_ALL

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    num_parts = comm.Get_size()

    mesh_dist = MPIMeshDistributor(comm)

    if mesh_dist.is_mananger_rank():
        from meshmode.mesh.generation import generate_regular_rect_mesh
        mesh = generate_regular_rect_mesh(a=(-1,)*2,
                                          b=(1,)*2,
                                          nelements_per_axis=(2,)*2)

        part_per_element = get_partition_by_pymetis(mesh, num_parts)

        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts)
    else:
        local_mesh = mesh_dist.receive_mesh_part()

    dcoll = DiscretizationCollection(actx, local_mesh, order=5,
            mpi_communicator=comm)

    x = thaw(dcoll.nodes(), actx)
    myfunc = actx.np.sin(np.dot(x, [2, 3]))

    from grudge.dof_desc import as_dofdesc

    dd_int = as_dofdesc("int_faces")
    dd_vol = as_dofdesc("vol")
    dd_af = as_dofdesc("all_faces")

    all_faces_func = op.project(dcoll, dd_vol, dd_af, myfunc)
    int_faces_func = op.project(dcoll, dd_vol, dd_int, myfunc)
    bdry_faces_func = op.project(dcoll, BTAG_ALL, dd_af,
                                 op.project(dcoll, dd_vol, BTAG_ALL, myfunc))

    hopefully_zero = (
        op.project(
            dcoll, "int_faces", "all_faces",
            dcoll.opposite_face_connection()(int_faces_func)
        )
        + sum(op.project(dcoll, tpair.dd, "all_faces", tpair.int)
              for tpair in op.cross_rank_trace_pairs(dcoll, myfunc))
    ) - (all_faces_func - bdry_faces_func)

    error = actx.to_numpy(flat_norm(hopefully_zero, ord=np.inf))

    print(__file__)
    with np.printoptions(threshold=100000000, suppress=True):
        logger.debug(hopefully_zero)
    logger.info("error: %.5e", error)

    assert error < 1e-14
def simple_mpi_communication_entrypoint():
    cl_ctx = cl.create_some_context()
    queue = cl.CommandQueue(cl_ctx)
    actx = PyOpenCLArrayContext(queue)

    from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis
    from meshmode.mesh import BTAG_ALL

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    num_parts = comm.Get_size()

    mesh_dist = MPIMeshDistributor(comm)

    if mesh_dist.is_mananger_rank():
        from meshmode.mesh.generation import generate_regular_rect_mesh
        mesh = generate_regular_rect_mesh(a=(-1, ) * 2,
                                          b=(1, ) * 2,
                                          nelements_per_axis=(2, ) * 2)

        part_per_element = get_partition_by_pymetis(mesh, num_parts)

        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element,
                                               num_parts)
    else:
        local_mesh = mesh_dist.receive_mesh_part()

    vol_discr = DiscretizationCollection(actx,
                                         local_mesh,
                                         order=5,
                                         mpi_communicator=comm)

    sym_x = sym.nodes(local_mesh.dim)
    myfunc_symb = sym.sin(np.dot(sym_x, [2, 3]))
    myfunc = bind(vol_discr, myfunc_symb)(actx)

    sym_all_faces_func = sym.cse(
        sym.project("vol", "all_faces")(sym.var("myfunc")))
    sym_int_faces_func = sym.cse(
        sym.project("vol", "int_faces")(sym.var("myfunc")))
    sym_bdry_faces_func = sym.cse(
        sym.project(BTAG_ALL,
                    "all_faces")(sym.project("vol",
                                             BTAG_ALL)(sym.var("myfunc"))))

    bound_face_swap = bind(
        vol_discr,
        sym.project("int_faces", "all_faces")(
            sym.OppositeInteriorFaceSwap("int_faces")(sym_int_faces_func)) -
        (sym_all_faces_func - sym_bdry_faces_func))

    hopefully_zero = bound_face_swap(myfunc=myfunc)
    error = actx.np.linalg.norm(hopefully_zero, ord=np.inf)

    print(__file__)
    with np.printoptions(threshold=100000000, suppress=True):
        logger.debug(hopefully_zero)
    logger.info("error: %.5e", error)

    assert error < 1e-14
Пример #4
0
def generate_and_distribute_mesh(comm, generate_mesh):
    """Generate a mesh and distribute it among all ranks in *comm*.

    Generate the mesh with the user-supplied mesh generation function
    *generate_mesh*, partition the mesh, and distribute it to every
    rank in the provided MPI communicator *comm*.

    .. note::
        This is a collective routine and must be called by all MPI ranks.

    Parameters
    ----------
    comm:
        MPI communicator over which to partition the mesh
    generate_mesh:
        Callable of zero arguments returning a :class:`meshmode.mesh.Mesh`.
        Will only be called on one (undetermined) rank.

    Returns
    -------
    local_mesh : :class:`meshmode.mesh.Mesh`
        The local partition of the the mesh returned by *generate_mesh*.
    global_nelements : :class:`int`
        The number of elements in the serial mesh
    """
    from meshmode.distributed import (
        MPIMeshDistributor,
        get_partition_by_pymetis,
    )
    num_parts = comm.Get_size()
    mesh_dist = MPIMeshDistributor(comm)
    global_nelements = 0

    if mesh_dist.is_mananger_rank():

        mesh = generate_mesh()

        global_nelements = mesh.nelements

        part_per_element = get_partition_by_pymetis(mesh, num_parts)
        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element,
                                               num_parts)
        del mesh

    else:
        local_mesh = mesh_dist.receive_mesh_part()

    return local_mesh, global_nelements
Пример #5
0
def create_parallel_grid(comm, generate_grid):
    """Create and partition a grid.

    Create a grid with the user-supplied grid generation function
    *generate_grid*, partition the grid, and distribute it to every
    rank in the provided MPI communicator *comm*.

    Parameters
    ----------
    comm:
        MPI communicator over which to partition the grid
    generate_grid:
        Callable of zero arguments returning a :class:`meshmode.mesh.Mesh`.
        Will only be called on one (undetermined) rank.

    Returns
    -------
    local_mesh : :class:`meshmode.mesh.Mesh`
        The local partition of the the mesh returned by *generate_grid*.
    global_nelements : :class:`int`
        The number of elements in the serial grid
    """
    from meshmode.distributed import (
        MPIMeshDistributor,
        get_partition_by_pymetis,
    )
    num_parts = comm.Get_size()
    mesh_dist = MPIMeshDistributor(comm)
    global_nelements = 0

    if mesh_dist.is_mananger_rank():

        mesh = generate_grid()

        global_nelements = mesh.nelements

        part_per_element = get_partition_by_pymetis(mesh, num_parts)
        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts)
        del mesh

    else:
        local_mesh = mesh_dist.receive_mesh_part()

    return local_mesh, global_nelements
Пример #6
0
def main():
    cl_ctx = cl.create_some_context()
    queue = cl.CommandQueue(cl_ctx)
    actx = PyOpenCLArrayContext(queue,
                                allocator=cl_tools.MemoryPool(
                                    cl_tools.ImmediateAllocator(queue)))

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    num_parts = comm.Get_size()

    from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis
    mesh_dist = MPIMeshDistributor(comm)

    dim = 2
    nel_1d = 16

    if mesh_dist.is_mananger_rank():
        from meshmode.mesh.generation import generate_regular_rect_mesh
        mesh = generate_regular_rect_mesh(a=(-0.5, ) * dim,
                                          b=(0.5, ) * dim,
                                          n=(nel_1d, ) * dim,
                                          boundary_tag_to_face={
                                              "dirichlet": ["+x", "-x"],
                                              "neumann": ["+y", "-y"]
                                          })

        print("%d elements" % mesh.nelements)

        part_per_element = get_partition_by_pymetis(mesh, num_parts)

        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element,
                                               num_parts)

        del mesh

    else:
        local_mesh = mesh_dist.receive_mesh_part()

    order = 3

    discr = EagerDGDiscretization(actx,
                                  local_mesh,
                                  order=order,
                                  mpi_communicator=comm)

    if dim == 2:
        # no deep meaning here, just a fudge factor
        dt = 0.0025 / (nel_1d * order**2)
    else:
        raise ValueError("don't have a stable time step guesstimate")

    source_width = 0.2

    nodes = thaw(actx, discr.nodes())

    u = discr.zeros(actx)

    vis = make_visualizer(discr, order + 3 if dim == 2 else order)

    boundaries = {
        grudge_sym.DTAG_BOUNDARY("dirichlet"): DirichletDiffusionBoundary(0.),
        grudge_sym.DTAG_BOUNDARY("neumann"): NeumannDiffusionBoundary(0.)
    }

    def rhs(t, u):
        return (
            diffusion_operator(discr, alpha=1, boundaries=boundaries, u=u) +
            actx.np.exp(-np.dot(nodes, nodes) / source_width**2))

    rank = comm.Get_rank()

    t = 0
    t_final = 0.01
    istep = 0

    while True:
        if istep % 10 == 0:
            print(istep, t, discr.norm(u))
            vis.write_vtk_file(
                "fld-heat-source-mpi-%03d-%04d.vtu" % (rank, istep),
                [("u", u)])

        if t >= t_final:
            break

        u = rk4_step(u, t, dt, rhs)
        t += dt
        istep += 1
Пример #7
0
def main():
    cl_ctx = cl.create_some_context()
    queue = cl.CommandQueue(cl_ctx)
    actx = PyOpenCLArrayContext(queue,
                                allocator=cl_tools.MemoryPool(
                                    cl_tools.ImmediateAllocator(queue)))

    comm = MPI.COMM_WORLD
    num_parts = comm.Get_size()

    from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis
    mesh_dist = MPIMeshDistributor(comm)

    dim = 2
    nel_1d = 16

    if mesh_dist.is_mananger_rank():
        from meshmode.mesh.generation import generate_regular_rect_mesh
        mesh = generate_regular_rect_mesh(a=(-0.5, ) * dim,
                                          b=(0.5, ) * dim,
                                          n=(nel_1d, ) * dim)

        print("%d elements" % mesh.nelements)

        part_per_element = get_partition_by_pymetis(mesh, num_parts)

        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element,
                                               num_parts)

        del mesh

    else:
        local_mesh = mesh_dist.receive_mesh_part()

    order = 3

    discr = EagerDGDiscretization(actx,
                                  local_mesh,
                                  order=order,
                                  mpi_communicator=comm)

    if dim == 2:
        # no deep meaning here, just a fudge factor
        dt = 0.75 / (nel_1d * order**2)
    elif dim == 3:
        # no deep meaning here, just a fudge factor
        dt = 0.45 / (nel_1d * order**2)
    else:
        raise ValueError("don't have a stable time step guesstimate")

    fields = flat_obj_array(bump(actx, discr),
                            [discr.zeros(actx) for i in range(discr.dim)])

    vis = make_visualizer(discr, order + 3 if dim == 2 else order)

    def rhs(t, w):
        return wave_operator(discr, c=1, w=w)

    rank = comm.Get_rank()

    t = 0
    t_final = 3
    istep = 0
    while t < t_final:
        fields = rk4_step(fields, t, dt, rhs)

        if istep % 10 == 0:
            print(istep, t, discr.norm(fields[0]))
            vis.write_vtk_file(
                "fld-wave-eager-mpi-%03d-%04d.vtu" % (rank, istep), [
                    ("u", fields[0]),
                    ("v", fields[1:]),
                ])

        t += dt
        istep += 1
Пример #8
0
def mpi_communication_entrypoint():
    cl_ctx = cl.create_some_context()
    queue = cl.CommandQueue(cl_ctx)

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    i_local_rank = comm.Get_rank()
    num_parts = comm.Get_size()

    from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis
    mesh_dist = MPIMeshDistributor(comm)

    dim = 2
    dt = 0.04
    order = 4

    if mesh_dist.is_mananger_rank():
        from meshmode.mesh.generation import generate_regular_rect_mesh
        mesh = generate_regular_rect_mesh(a=(-0.5, ) * dim,
                                          b=(0.5, ) * dim,
                                          n=(16, ) * dim)

        part_per_element = get_partition_by_pymetis(mesh, num_parts)

        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element,
                                               num_parts)
    else:
        local_mesh = mesh_dist.receive_mesh_part()

    vol_discr = DGDiscretizationWithBoundaries(cl_ctx,
                                               local_mesh,
                                               order=order,
                                               mpi_communicator=comm)

    source_center = np.array([0.1, 0.22, 0.33])[:local_mesh.dim]
    source_width = 0.05
    source_omega = 3

    sym_x = sym.nodes(local_mesh.dim)
    sym_source_center_dist = sym_x - source_center
    sym_t = sym.ScalarVariable("t")

    from grudge.models.wave import StrongWaveOperator
    from meshmode.mesh import BTAG_ALL, BTAG_NONE
    op = StrongWaveOperator(
        -0.1,
        vol_discr.dim,
        source_f=(
            sym.sin(source_omega * sym_t) *
            sym.exp(-np.dot(sym_source_center_dist, sym_source_center_dist) /
                    source_width**2)),
        dirichlet_tag=BTAG_NONE,
        neumann_tag=BTAG_NONE,
        radiation_tag=BTAG_ALL,
        flux_type="upwind")

    from pytools.obj_array import join_fields
    fields = join_fields(
        vol_discr.zeros(queue),
        [vol_discr.zeros(queue) for i in range(vol_discr.dim)])

    # FIXME
    # dt = op.estimate_rk4_timestep(vol_discr, fields=fields)

    # FIXME: Should meshmode consider BTAG_PARTITION to be a boundary?
    #           Fails because: "found faces without boundary conditions"
    # op.check_bc_coverage(local_mesh)

    from pytools.log import LogManager, \
            add_general_quantities, \
            add_run_info, \
            IntervalTimer, EventCounter
    log_filename = None
    # NOTE: LogManager hangs when using a file on a shared directory.
    # log_filename = 'grudge_log.dat'
    logmgr = LogManager(log_filename, "w", comm)
    add_run_info(logmgr)
    add_general_quantities(logmgr)
    log_quantities =\
        {"rank_data_swap_timer": IntervalTimer("rank_data_swap_timer",
        "Time spent evaluating RankDataSwapAssign"),
        "rank_data_swap_counter": EventCounter("rank_data_swap_counter",
        "Number of RankDataSwapAssign instructions evaluated"),
        "exec_timer": IntervalTimer("exec_timer",
        "Total time spent executing instructions"),
        "insn_eval_timer": IntervalTimer("insn_eval_timer",
        "Time spend evaluating instructions"),
        "future_eval_timer": IntervalTimer("future_eval_timer",
        "Time spent evaluating futures"),
        "busy_wait_timer": IntervalTimer("busy_wait_timer",
        "Time wasted doing busy wait")}
    for quantity in log_quantities.values():
        logmgr.add_quantity(quantity)

    # print(sym.pretty(op.sym_operator()))
    bound_op = bind(vol_discr, op.sym_operator())

    # print(bound_op)
    # 1/0

    def rhs(t, w):
        val, rhs.profile_data = bound_op(queue,
                                         profile_data=rhs.profile_data,
                                         log_quantities=log_quantities,
                                         t=t,
                                         w=w)
        return val

    rhs.profile_data = {}

    dt_stepper = set_up_rk4("w", dt, fields, rhs)

    final_t = 4
    nsteps = int(final_t / dt)
    print("rank=%d dt=%g nsteps=%d" % (i_local_rank, dt, nsteps))

    # from grudge.shortcuts import make_visualizer
    # vis = make_visualizer(vol_discr, vis_order=order)

    step = 0

    norm = bind(vol_discr, sym.norm(2, sym.var("u")))

    from time import time
    t_last_step = time()

    logmgr.tick_before()
    for event in dt_stepper.run(t_end=final_t):
        if isinstance(event, dt_stepper.StateComputed):
            assert event.component_id == "w"

            step += 1
            print(step, event.t, norm(queue, u=event.state_component[0]),
                  time() - t_last_step)

            # if step % 10 == 0:
            #     vis.write_vtk_file("rank%d-fld-%04d.vtu" % (i_local_rank, step),
            #                        [("u", event.state_component[0]),
            #                         ("v", event.state_component[1:])])
            t_last_step = time()
            logmgr.tick_after()
            logmgr.tick_before()
    logmgr.tick_after()

    def print_profile_data(data):
        print("""execute() for rank %d:
            \tInstruction Evaluation: %f%%
            \tFuture Evaluation: %f%%
            \tBusy Wait: %f%%
            \tTotal: %f seconds""" %
              (i_local_rank, data['insn_eval_time'] / data['total_time'] * 100,
               data['future_eval_time'] / data['total_time'] * 100,
               data['busy_wait_time'] / data['total_time'] * 100,
               data['total_time']))

    print_profile_data(rhs.profile_data)
    logmgr.close()
    logger.debug("Rank %d exiting", i_local_rank)
Пример #9
0
def simple_wave_entrypoint(dim=2, num_elems=256, order=4, num_steps=30,
                           log_filename="grudge.dat"):
    cl_ctx = cl.create_some_context()
    queue = cl.CommandQueue(cl_ctx)

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    num_parts = comm.Get_size()
    n = int(num_elems ** (1./dim))

    from meshmode.distributed import MPIMeshDistributor
    mesh_dist = MPIMeshDistributor(comm)

    if mesh_dist.is_mananger_rank():
        from meshmode.mesh.generation import generate_regular_rect_mesh
        mesh = generate_regular_rect_mesh(a=(-0.5,)*dim,
                                          b=(0.5,)*dim,
                                          n=(n,)*dim)

        from pymetis import part_graph
        _, p = part_graph(num_parts,
                          xadj=mesh.nodal_adjacency.neighbors_starts.tolist(),
                          adjncy=mesh.nodal_adjacency.neighbors.tolist())
        part_per_element = np.array(p)

        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts)
    else:
        local_mesh = mesh_dist.receive_mesh_part()

    vol_discr = DGDiscretizationWithBoundaries(cl_ctx, local_mesh, order=order,
                                               mpi_communicator=comm)

    source_center = np.array([0.1, 0.22, 0.33])[:local_mesh.dim]
    source_width = 0.05
    source_omega = 3

    sym_x = sym.nodes(local_mesh.dim)
    sym_source_center_dist = sym_x - source_center
    sym_t = sym.ScalarVariable("t")

    from grudge.models.wave import StrongWaveOperator
    from meshmode.mesh import BTAG_ALL, BTAG_NONE
    op = StrongWaveOperator(-0.1, vol_discr.dim,
            source_f=(
                sym.sin(source_omega*sym_t)
                * sym.exp(
                    -np.dot(sym_source_center_dist, sym_source_center_dist)
                    / source_width**2)),
            dirichlet_tag=BTAG_NONE,
            neumann_tag=BTAG_NONE,
            radiation_tag=BTAG_ALL,
            flux_type="upwind")

    from pytools.obj_array import join_fields
    fields = join_fields(vol_discr.zeros(queue),
            [vol_discr.zeros(queue) for i in range(vol_discr.dim)])

    from logpyle import LogManager, \
            add_general_quantities, \
            add_run_info, \
            IntervalTimer, EventCounter
    # NOTE: LogManager hangs when using a file on a shared directory.
    logmgr = LogManager(log_filename, "w", comm)
    add_run_info(logmgr)
    add_general_quantities(logmgr)
    log_quantities =\
        {"rank_data_swap_timer": IntervalTimer("rank_data_swap_timer",
                        "Time spent evaluating RankDataSwapAssign"),
        "rank_data_swap_counter": EventCounter("rank_data_swap_counter",
                        "Number of RankDataSwapAssign instructions evaluated"),
        "exec_timer": IntervalTimer("exec_timer",
                        "Total time spent executing instructions"),
        "insn_eval_timer": IntervalTimer("insn_eval_timer",
                        "Time spend evaluating instructions"),
        "future_eval_timer": IntervalTimer("future_eval_timer",
                        "Time spent evaluating futures"),
        "busy_wait_timer": IntervalTimer("busy_wait_timer",
                        "Time wasted doing busy wait")}
    for quantity in log_quantities.values():
        logmgr.add_quantity(quantity)

    bound_op = bind(vol_discr, op.sym_operator())

    def rhs(t, w):
        val, rhs.profile_data = bound_op(queue, profile_data=rhs.profile_data,
                                                log_quantities=log_quantities,
                                                t=t, w=w)
        return val
    rhs.profile_data = {}

    dt = 0.04
    dt_stepper = set_up_rk4("w", dt, fields, rhs)

    logmgr.tick_before()
    for event in dt_stepper.run(t_end=dt * num_steps):
        if isinstance(event, dt_stepper.StateComputed):
            logmgr.tick_after()
            logmgr.tick_before()
    logmgr.tick_after()

    def print_profile_data(data):
        print("""execute() for rank %d:
            \tInstruction Evaluation: %f%%
            \tFuture Evaluation: %f%%
            \tBusy Wait: %f%%
            \tTotal: %f seconds""" %
            (comm.Get_rank(),
             data['insn_eval_time'] / data['total_time'] * 100,
             data['future_eval_time'] / data['total_time'] * 100,
             data['busy_wait_time'] / data['total_time'] * 100,
             data['total_time']))

    print_profile_data(rhs.profile_data)
    logmgr.close()
Пример #10
0
def main(ctx_factory, dim=2, order=3, visualize=False, lazy=False):
    cl_ctx = ctx_factory()
    queue = cl.CommandQueue(cl_ctx)

    if lazy:
        actx = PytatoPyOpenCLArrayContext(queue)
    else:
        actx = PyOpenCLArrayContext(
            queue,
            allocator=cl_tools.MemoryPool(cl_tools.ImmediateAllocator(queue)),
            force_device_scalars=True,
        )

    comm = MPI.COMM_WORLD
    num_parts = comm.Get_size()

    from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis
    mesh_dist = MPIMeshDistributor(comm)

    nel_1d = 16

    if mesh_dist.is_mananger_rank():
        from meshmode.mesh.generation import generate_regular_rect_mesh
        mesh = generate_regular_rect_mesh(a=(-0.5, ) * dim,
                                          b=(0.5, ) * dim,
                                          nelements_per_axis=(nel_1d, ) * dim)

        logger.info("%d elements", mesh.nelements)

        part_per_element = get_partition_by_pymetis(mesh, num_parts)

        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element,
                                               num_parts)

        del mesh

    else:
        local_mesh = mesh_dist.receive_mesh_part()

    dcoll = DiscretizationCollection(actx,
                                     local_mesh,
                                     order=order,
                                     mpi_communicator=comm)

    fields = WaveState(u=bump(actx, dcoll),
                       v=make_obj_array(
                           [dcoll.zeros(actx) for i in range(dcoll.dim)]))

    c = 1
    dt = actx.to_numpy(0.45 * estimate_rk4_timestep(actx, dcoll, c))

    vis = make_visualizer(dcoll)

    def rhs(t, w):
        return wave_operator(dcoll, c=c, w=w)

    compiled_rhs = actx.compile(rhs)

    if comm.rank == 0:
        logger.info("dt = %g", dt)

    import time
    start = time.time()

    t = 0
    t_final = 3
    istep = 0
    while t < t_final:
        if lazy:
            fields = thaw(freeze(fields, actx), actx)

        fields = rk4_step(fields, t, dt, compiled_rhs)

        l2norm = actx.to_numpy(op.norm(dcoll, fields.u, 2))

        if istep % 10 == 0:
            stop = time.time()
            linfnorm = actx.to_numpy(op.norm(dcoll, fields.u, np.inf))
            nodalmax = actx.to_numpy(op.nodal_max(dcoll, "vol", fields.u))
            nodalmin = actx.to_numpy(op.nodal_min(dcoll, "vol", fields.u))
            if comm.rank == 0:
                logger.info(f"step: {istep} t: {t} "
                            f"L2: {l2norm} "
                            f"Linf: {linfnorm} "
                            f"sol max: {nodalmax} "
                            f"sol min: {nodalmin} "
                            f"wall: {stop-start} ")
            if visualize:
                vis.write_parallel_vtk_file(
                    comm, f"fld-wave-eager-mpi-{{rank:03d}}-{istep:04d}.vtu", [
                        ("u", fields.u),
                        ("v", fields.v),
                    ])
            start = stop

        t += dt
        istep += 1

        # NOTE: These are here to ensure the solution is bounded for the
        # time interval specified
        assert l2norm < 1
Пример #11
0
def mpi_communication_entrypoint():
    cl_ctx = cl.create_some_context()
    queue = cl.CommandQueue(cl_ctx)
    actx = PyOpenCLArrayContext(queue, force_device_scalars=True)

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    i_local_rank = comm.Get_rank()
    num_parts = comm.Get_size()

    from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis
    mesh_dist = MPIMeshDistributor(comm)

    dim = 2
    order = 4

    if mesh_dist.is_mananger_rank():
        from meshmode.mesh.generation import generate_regular_rect_mesh
        mesh = generate_regular_rect_mesh(a=(-0.5,)*dim,
                                          b=(0.5,)*dim,
                                          nelements_per_axis=(16,)*dim)

        part_per_element = get_partition_by_pymetis(mesh, num_parts)

        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts)

        del mesh
    else:
        local_mesh = mesh_dist.receive_mesh_part()

    dcoll = DiscretizationCollection(actx, local_mesh, order=order,
                                     mpi_communicator=comm)

    def source_f(actx, dcoll, t=0):
        source_center = np.array([0.1, 0.22, 0.33])[:dcoll.dim]
        source_width = 0.05
        source_omega = 3
        nodes = thaw(dcoll.nodes(), actx)
        source_center_dist = flat_obj_array(
            [nodes[i] - source_center[i] for i in range(dcoll.dim)]
        )
        return (
            np.sin(source_omega*t)
            * actx.np.exp(
                -np.dot(source_center_dist, source_center_dist)
                / source_width**2
            )
        )

    from grudge.models.wave import WeakWaveOperator
    from meshmode.mesh import BTAG_ALL, BTAG_NONE

    wave_op = WeakWaveOperator(
        dcoll,
        0.1,
        source_f=source_f,
        dirichlet_tag=BTAG_NONE,
        neumann_tag=BTAG_NONE,
        radiation_tag=BTAG_ALL,
        flux_type="upwind"
    )

    fields = flat_obj_array(
        dcoll.zeros(actx),
        [dcoll.zeros(actx) for i in range(dcoll.dim)]
    )

    dt = actx.to_numpy(
        2/3 * wave_op.estimate_rk4_timestep(actx, dcoll, fields=fields))

    wave_op.check_bc_coverage(local_mesh)

    from logpyle import LogManager, \
            add_general_quantities, \
            add_run_info
    log_filename = None
    # NOTE: LogManager hangs when using a file on a shared directory.
    # log_filename = "grudge_log.dat"
    logmgr = LogManager(log_filename, "w", comm)
    add_run_info(logmgr)
    add_general_quantities(logmgr)

    def rhs(t, w):
        return wave_op.operator(t, w)

    dt_stepper = set_up_rk4("w", dt, fields, rhs)

    final_t = 4
    nsteps = int(final_t/dt)
    logger.info("[%04d] dt %.5e nsteps %4d", i_local_rank, dt, nsteps)

    step = 0

    def norm(u):
        return op.norm(dcoll, u, 2)

    from time import time
    t_last_step = time()

    logmgr.tick_before()
    for event in dt_stepper.run(t_end=final_t):
        if isinstance(event, dt_stepper.StateComputed):
            assert event.component_id == "w"

            step += 1
            logger.info("[%04d] t = %.5e |u| = %.5e ellapsed %.5e",
                        step, event.t,
                        norm(u=event.state_component[0]),
                        time() - t_last_step)

            t_last_step = time()
            logmgr.tick_after()
            logmgr.tick_before()

    logmgr.tick_after()
    logmgr.close()
    logger.info("Rank %d exiting", i_local_rank)
Пример #12
0
def main(write_output=True, order=4):
    cl_ctx = cl.create_some_context()
    queue = cl.CommandQueue(cl_ctx)
    actx = PyOpenCLArrayContext(queue)

    comm = MPI.COMM_WORLD
    num_parts = comm.Get_size()

    from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis
    mesh_dist = MPIMeshDistributor(comm)

    if mesh_dist.is_mananger_rank():
        dims = 2
        from meshmode.mesh.generation import generate_regular_rect_mesh
        mesh = generate_regular_rect_mesh(a=(-0.5, ) * dims,
                                          b=(0.5, ) * dims,
                                          n=(16, ) * dims)

        print("%d elements" % mesh.nelements)

        part_per_element = get_partition_by_pymetis(mesh, num_parts)

        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element,
                                               num_parts)

        del mesh

    else:
        local_mesh = mesh_dist.receive_mesh_part()

    discr = DGDiscretizationWithBoundaries(actx,
                                           local_mesh,
                                           order=order,
                                           mpi_communicator=comm)

    if local_mesh.dim == 2:
        dt = 0.04
    elif local_mesh.dim == 3:
        dt = 0.02

    source_center = np.array([0.1, 0.22, 0.33])[:local_mesh.dim]
    source_width = 0.05
    source_omega = 3

    sym_x = sym.nodes(local_mesh.dim)
    sym_source_center_dist = sym_x - source_center
    sym_t = sym.ScalarVariable("t")

    from grudge.models.wave import StrongWaveOperator
    from meshmode.mesh import BTAG_ALL, BTAG_NONE
    op = StrongWaveOperator(
        -0.1,
        discr.dim,
        source_f=(
            sym.sin(source_omega * sym_t) *
            sym.exp(-np.dot(sym_source_center_dist, sym_source_center_dist) /
                    source_width**2)),
        dirichlet_tag=BTAG_NONE,
        neumann_tag=BTAG_NONE,
        radiation_tag=BTAG_ALL,
        flux_type="upwind")

    from pytools.obj_array import flat_obj_array
    fields = flat_obj_array(discr.zeros(actx),
                            [discr.zeros(actx) for i in range(discr.dim)])

    # FIXME
    #dt = op.estimate_rk4_timestep(discr, fields=fields)

    op.check_bc_coverage(local_mesh)

    # print(sym.pretty(op.sym_operator()))
    bound_op = bind(discr, op.sym_operator())

    def rhs(t, w):
        return bound_op(t=t, w=w)

    dt_stepper = set_up_rk4("w", dt, fields, rhs)

    final_t = 10
    nsteps = int(final_t / dt)
    print("dt=%g nsteps=%d" % (dt, nsteps))

    from grudge.shortcuts import make_visualizer
    vis = make_visualizer(discr, vis_order=order)

    step = 0

    norm = bind(discr, sym.norm(2, sym.var("u")))

    from time import time
    t_last_step = time()

    rank = comm.Get_rank()

    for event in dt_stepper.run(t_end=final_t):
        if isinstance(event, dt_stepper.StateComputed):
            assert event.component_id == "w"

            step += 1

            print(step, event.t, norm(u=event.state_component[0]),
                  time() - t_last_step)
            if step % 10 == 0:
                vis.write_vtk_file(
                    "fld-wave-min-mpi-%03d-%04d.vtu" % (
                        rank,
                        step,
                    ), [
                        ("u", event.state_component[0]),
                        ("v", event.state_component[1:]),
                    ])
            t_last_step = time()
Пример #13
0
def main(snapshot_pattern="wave-eager-{step:04d}-{rank:04d}.pkl",
         restart_step=None):
    """Drive the example."""
    cl_ctx = cl.create_some_context()
    queue = cl.CommandQueue(cl_ctx)
    actx = PyOpenCLArrayContext(queue,
                                allocator=cl_tools.MemoryPool(
                                    cl_tools.ImmediateAllocator(queue)))

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    num_parts = comm.Get_size()

    if restart_step is None:

        from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis
        mesh_dist = MPIMeshDistributor(comm)

        dim = 2
        nel_1d = 16

        if mesh_dist.is_mananger_rank():
            from meshmode.mesh.generation import generate_regular_rect_mesh
            mesh = generate_regular_rect_mesh(a=(-0.5, ) * dim,
                                              b=(0.5, ) * dim,
                                              nelements_per_axis=(nel_1d, ) *
                                              dim)

            print("%d elements" % mesh.nelements)
            part_per_element = get_partition_by_pymetis(mesh, num_parts)
            local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element,
                                                   num_parts)

            del mesh

        else:
            local_mesh = mesh_dist.receive_mesh_part()

        fields = None

    else:
        from mirgecom.restart import read_restart_data
        restart_data = read_restart_data(
            actx, snapshot_pattern.format(step=restart_step, rank=rank))
        local_mesh = restart_data["local_mesh"]
        nel_1d = restart_data["nel_1d"]
        assert comm.Get_size() == restart_data["num_parts"]

    order = 3

    discr = EagerDGDiscretization(actx,
                                  local_mesh,
                                  order=order,
                                  mpi_communicator=comm)

    if local_mesh.dim == 2:
        # no deep meaning here, just a fudge factor
        dt = 0.7 / (nel_1d * order**2)
    elif dim == 3:
        # no deep meaning here, just a fudge factor
        dt = 0.4 / (nel_1d * order**2)
    else:
        raise ValueError("don't have a stable time step guesstimate")

    t_final = 3

    if restart_step is None:
        t = 0
        istep = 0

        fields = flat_obj_array(bump(actx, discr),
                                [discr.zeros(actx) for i in range(discr.dim)])

    else:
        t = restart_data["t"]
        istep = restart_step
        assert istep == restart_step
        restart_fields = restart_data["fields"]
        old_order = restart_data["order"]
        if old_order != order:
            old_discr = EagerDGDiscretization(actx,
                                              local_mesh,
                                              order=old_order,
                                              mpi_communicator=comm)
            from meshmode.discretization.connection import make_same_mesh_connection
            connection = make_same_mesh_connection(
                actx, discr.discr_from_dd("vol"),
                old_discr.discr_from_dd("vol"))
            fields = connection(restart_fields)
        else:
            fields = restart_fields

    vis = make_visualizer(discr)

    def rhs(t, w):
        return wave_operator(discr, c=1, w=w)

    while t < t_final:
        # restart must happen at beginning of step
        if istep % 100 == 0 and (
                # Do not overwrite the restart file that we just read.
                istep != restart_step):
            from mirgecom.restart import write_restart_file
            write_restart_file(actx,
                               restart_data={
                                   "local_mesh": local_mesh,
                                   "order": order,
                                   "fields": fields,
                                   "t": t,
                                   "step": istep,
                                   "nel_1d": nel_1d,
                                   "num_parts": num_parts
                               },
                               filename=snapshot_pattern.format(step=istep,
                                                                rank=rank),
                               comm=comm)

        if istep % 10 == 0:
            print(istep, t, discr.norm(fields[0]))
            vis.write_parallel_vtk_file(
                comm, "fld-wave-eager-mpi-%03d-%04d.vtu" % (rank, istep), [
                    ("u", fields[0]),
                    ("v", fields[1:]),
                ])

        fields = rk4_step(fields, t, dt, rhs)

        t += dt
        istep += 1
Пример #14
0
def _test_mpi_boundary_swap(dim, order, num_groups):
    from meshmode.distributed import MPIMeshDistributor, MPIBoundaryCommSetupHelper

    from mpi4py import MPI
    mpi_comm = MPI.COMM_WORLD
    i_local_part = mpi_comm.Get_rank()
    num_parts = mpi_comm.Get_size()

    mesh_dist = MPIMeshDistributor(mpi_comm)

    if mesh_dist.is_mananger_rank():
        np.random.seed(42)
        from meshmode.mesh.generation import generate_warped_rect_mesh
        meshes = [generate_warped_rect_mesh(dim, order=order, nelements_side=4)
                        for _ in range(num_groups)]

        if num_groups > 1:
            from meshmode.mesh.processing import merge_disjoint_meshes
            mesh = merge_disjoint_meshes(meshes)
        else:
            mesh = meshes[0]

        part_per_element = np.random.randint(num_parts, size=mesh.nelements)

        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts)
    else:
        local_mesh = mesh_dist.receive_mesh_part()

    group_factory = PolynomialWarpAndBlendGroupFactory(order)

    from arraycontext import PyOpenCLArrayContext
    cl_ctx = cl.create_some_context()
    queue = cl.CommandQueue(cl_ctx)
    actx = PyOpenCLArrayContext(queue)

    from meshmode.discretization import Discretization
    vol_discr = Discretization(actx, local_mesh, group_factory)

    from meshmode.distributed import get_connected_partitions
    connected_parts = get_connected_partitions(local_mesh)

    # Check that the connectivity makes sense before doing any communication
    _test_connected_parts(mpi_comm, connected_parts)

    from meshmode.discretization.connection import make_face_restriction
    from meshmode.mesh import BTAG_PARTITION
    local_bdry_conns = {}
    for i_remote_part in connected_parts:
        local_bdry_conns[i_remote_part] = make_face_restriction(
                actx, vol_discr, group_factory, BTAG_PARTITION(i_remote_part))

    remote_to_local_bdry_conns = {}
    with MPIBoundaryCommSetupHelper(mpi_comm, actx, local_bdry_conns,
            bdry_grp_factory=group_factory) as bdry_setup_helper:
        from meshmode.discretization.connection import check_connection
        while True:
            conns = bdry_setup_helper.complete_some()
            if not conns:
                break
            for i_remote_part, conn in conns.items():
                check_connection(actx, conn)
                remote_to_local_bdry_conns[i_remote_part] = conn

    _test_data_transfer(mpi_comm,
                        actx,
                        local_bdry_conns,
                        remote_to_local_bdry_conns,
                        connected_parts)

    logger.debug("Rank %d exiting", i_local_part)
Пример #15
0
def main(ctx_factory, dim=2, order=4, visualize=False):
    cl_ctx = cl.create_some_context()
    queue = cl.CommandQueue(cl_ctx)
    actx = PyOpenCLArrayContext(
        queue,
        allocator=cl_tools.MemoryPool(cl_tools.ImmediateAllocator(queue)),
        force_device_scalars=True,
    )

    comm = MPI.COMM_WORLD
    num_parts = comm.Get_size()

    from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis
    mesh_dist = MPIMeshDistributor(comm)

    if mesh_dist.is_mananger_rank():
        from meshmode.mesh.generation import generate_regular_rect_mesh
        mesh = generate_regular_rect_mesh(a=(-0.5, ) * dim,
                                          b=(0.5, ) * dim,
                                          nelements_per_axis=(16, ) * dim)

        logger.info("%d elements", mesh.nelements)

        part_per_element = get_partition_by_pymetis(mesh, num_parts)

        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element,
                                               num_parts)

        del mesh

    else:
        local_mesh = mesh_dist.receive_mesh_part()

    dcoll = DiscretizationCollection(actx,
                                     local_mesh,
                                     order=order,
                                     mpi_communicator=comm)

    def source_f(actx, dcoll, t=0):
        source_center = np.array([0.1, 0.22, 0.33])[:dcoll.dim]
        source_width = 0.05
        source_omega = 3
        nodes = thaw(dcoll.nodes(), actx)
        source_center_dist = flat_obj_array(
            [nodes[i] - source_center[i] for i in range(dcoll.dim)])
        return (np.sin(source_omega * t) * actx.np.exp(
            -np.dot(source_center_dist, source_center_dist) / source_width**2))

    from grudge.models.wave import WeakWaveOperator
    from meshmode.mesh import BTAG_ALL, BTAG_NONE

    wave_op = WeakWaveOperator(dcoll,
                               0.1,
                               source_f=source_f,
                               dirichlet_tag=BTAG_NONE,
                               neumann_tag=BTAG_NONE,
                               radiation_tag=BTAG_ALL,
                               flux_type="upwind")

    fields = flat_obj_array(dcoll.zeros(actx),
                            [dcoll.zeros(actx) for i in range(dcoll.dim)])

    dt = 2 / 3 * wave_op.estimate_rk4_timestep(actx, dcoll, fields=fields)

    wave_op.check_bc_coverage(local_mesh)

    def rhs(t, w):
        return wave_op.operator(t, w)

    dt_stepper = set_up_rk4("w", dt, fields, rhs)

    final_t = 10
    nsteps = int(final_t / dt) + 1

    if comm.rank == 0:
        logger.info("dt=%g nsteps=%d", dt, nsteps)

    from grudge.shortcuts import make_visualizer
    vis = make_visualizer(dcoll)

    step = 0

    def norm(u):
        return op.norm(dcoll, u, 2)

    from time import time
    t_last_step = time()

    if visualize:
        u = fields[0]
        v = fields[1:]
        vis.write_parallel_vtk_file(
            comm, f"fld-wave-min-mpi-{{rank:03d}}-{step:04d}.vtu", [
                ("u", u),
                ("v", v),
            ])

    for event in dt_stepper.run(t_end=final_t):
        if isinstance(event, dt_stepper.StateComputed):
            assert event.component_id == "w"

            step += 1
            l2norm = norm(u=event.state_component[0])

            if step % 10 == 0:
                if comm.rank == 0:
                    logger.info(f"step: {step} "
                                f"t: {time()-t_last_step} "
                                f"L2: {l2norm}")
                if visualize:
                    vis.write_parallel_vtk_file(
                        comm, f"fld-wave-min-mpi-{{rank:03d}}-{step:04d}.vtu",
                        [
                            ("u", event.state_component[0]),
                            ("v", event.state_component[1:]),
                        ])
            t_last_step = time()

            # NOTE: These are here to ensure the solution is bounded for the
            # time interval specified
            assert l2norm < 1
Пример #16
0
def _test_mpi_boundary_swap(dim, order, num_groups):
    from meshmode.distributed import MPIMeshDistributor, MPIBoundaryCommSetupHelper

    from mpi4py import MPI
    mpi_comm = MPI.COMM_WORLD
    i_local_part = mpi_comm.Get_rank()
    num_parts = mpi_comm.Get_size()

    mesh_dist = MPIMeshDistributor(mpi_comm)

    if mesh_dist.is_mananger_rank():
        np.random.seed(42)
        from meshmode.mesh.generation import generate_warped_rect_mesh
        meshes = [
            generate_warped_rect_mesh(dim, order=order, n=4)
            for _ in range(num_groups)
        ]

        if num_groups > 1:
            from meshmode.mesh.processing import merge_disjoint_meshes
            mesh = merge_disjoint_meshes(meshes)
        else:
            mesh = meshes[0]

        part_per_element = np.random.randint(num_parts, size=mesh.nelements)

        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element,
                                               num_parts)
    else:
        local_mesh = mesh_dist.receive_mesh_part()

    group_factory = PolynomialWarpAndBlendGroupFactory(order)

    from meshmode.array_context import PyOpenCLArrayContext
    cl_ctx = cl.create_some_context()
    queue = cl.CommandQueue(cl_ctx)
    actx = PyOpenCLArrayContext(queue)

    from meshmode.discretization import Discretization
    vol_discr = Discretization(actx, local_mesh, group_factory)

    from meshmode.distributed import get_connected_partitions
    connected_parts = get_connected_partitions(local_mesh)
    assert i_local_part not in connected_parts
    bdry_setup_helpers = {}
    local_bdry_conns = {}

    from meshmode.discretization.connection import make_face_restriction
    from meshmode.mesh import BTAG_PARTITION
    for i_remote_part in connected_parts:
        local_bdry_conns[i_remote_part] = make_face_restriction(
            actx, vol_discr, group_factory, BTAG_PARTITION(i_remote_part))

        setup_helper = bdry_setup_helpers[i_remote_part] = \
                MPIBoundaryCommSetupHelper(
                        mpi_comm, actx, local_bdry_conns[i_remote_part],
                        i_remote_part, bdry_grp_factory=group_factory)

        setup_helper.post_sends()

    remote_to_local_bdry_conns = {}
    from meshmode.discretization.connection import check_connection
    while bdry_setup_helpers:
        for i_remote_part, setup_helper in bdry_setup_helpers.items():
            if setup_helper.is_setup_ready():
                assert bdry_setup_helpers.pop(i_remote_part) is setup_helper
                conn = setup_helper.complete_setup()
                check_connection(actx, conn)
                remote_to_local_bdry_conns[i_remote_part] = conn
                break

        # FIXME: Not ideal, busy-waits

    _test_data_transfer(mpi_comm, actx, local_bdry_conns,
                        remote_to_local_bdry_conns, connected_parts)

    logger.debug("Rank %d exiting", i_local_part)
Пример #17
0
def main(ctx_factory=cl.create_some_context, use_logmgr=True,
         use_leap=False, use_profiling=False, casename=None,
         rst_filename=None, actx_class=PyOpenCLArrayContext):
    """Run the example."""
    cl_ctx = cl.create_some_context()
    queue = cl.CommandQueue(cl_ctx)

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    num_parts = comm.Get_size()

    logmgr = initialize_logmgr(use_logmgr,
        filename="heat-source.sqlite", mode="wu", mpi_comm=comm)

    if use_profiling:
        queue = cl.CommandQueue(
            cl_ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
    else:
        queue = cl.CommandQueue(cl_ctx)

    actx = actx_class(
        queue,
        allocator=cl_tools.MemoryPool(cl_tools.ImmediateAllocator(queue)))

    from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis
    mesh_dist = MPIMeshDistributor(comm)

    dim = 2
    nel_1d = 16

    t = 0
    t_final = 0.0002
    istep = 0

    if mesh_dist.is_mananger_rank():
        from meshmode.mesh.generation import generate_regular_rect_mesh
        mesh = generate_regular_rect_mesh(
            a=(-0.5,)*dim,
            b=(0.5,)*dim,
            nelements_per_axis=(nel_1d,)*dim,
            boundary_tag_to_face={
                "dirichlet": ["+x", "-x"],
                "neumann": ["+y", "-y"]
                }
            )

        print("%d elements" % mesh.nelements)

        part_per_element = get_partition_by_pymetis(mesh, num_parts)

        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts)

        del mesh

    else:
        local_mesh = mesh_dist.receive_mesh_part()

    order = 3

    discr = EagerDGDiscretization(actx, local_mesh, order=order,
                    mpi_communicator=comm)

    if dim == 2:
        # no deep meaning here, just a fudge factor
        dt = 0.0025/(nel_1d*order**2)
    else:
        raise ValueError("don't have a stable time step guesstimate")

    source_width = 0.2

    from arraycontext import thaw
    nodes = thaw(discr.nodes(), actx)

    boundaries = {
        DTAG_BOUNDARY("dirichlet"): DirichletDiffusionBoundary(0.),
        DTAG_BOUNDARY("neumann"): NeumannDiffusionBoundary(0.)
    }

    u = discr.zeros(actx)

    if logmgr:
        logmgr_add_device_name(logmgr, queue)
        logmgr_add_device_memory_usage(logmgr, queue)

        logmgr.add_watches(["step.max", "t_step.max", "t_log.max"])

        try:
            logmgr.add_watches(["memory_usage_python.max", "memory_usage_gpu.max"])
        except KeyError:
            pass

        if use_profiling:
            logmgr.add_watches(["multiply_time.max"])

        vis_timer = IntervalTimer("t_vis", "Time spent visualizing")
        logmgr.add_quantity(vis_timer)

    vis = make_visualizer(discr)

    def rhs(t, u):
        return (
            diffusion_operator(
                discr, quad_tag=DISCR_TAG_BASE,
                alpha=1, boundaries=boundaries, u=u)
            + actx.np.exp(-np.dot(nodes, nodes)/source_width**2))

    compiled_rhs = actx.compile(rhs)

    rank = comm.Get_rank()

    while t < t_final:
        if logmgr:
            logmgr.tick_before()

        if istep % 10 == 0:
            print(istep, t, actx.to_numpy(actx.np.linalg.norm(u[0])))
            vis.write_vtk_file("fld-heat-source-mpi-%03d-%04d.vtu" % (rank, istep),
                    [
                        ("u", u)
                        ], overwrite=True)

        u = rk4_step(u, t, dt, compiled_rhs)
        t += dt
        istep += 1

        if logmgr:
            set_dt(logmgr, dt)
            logmgr.tick_after()
    final_answer = discr.norm(u, np.inf)
    resid = abs(final_answer - 0.00020620711665201585)
    if resid > 1e-15:
        raise ValueError(f"Run did not produce the expected result {resid=}")
Пример #18
0
def main(snapshot_pattern="wave-mpi-{step:04d}-{rank:04d}.pkl", restart_step=None,
         use_profiling=False, use_logmgr=False, actx_class=PyOpenCLArrayContext):
    """Drive the example."""
    cl_ctx = cl.create_some_context()
    queue = cl.CommandQueue(cl_ctx)

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    num_parts = comm.Get_size()

    logmgr = initialize_logmgr(use_logmgr,
        filename="wave-mpi.sqlite", mode="wu", mpi_comm=comm)
    if use_profiling:
        queue = cl.CommandQueue(cl_ctx,
            properties=cl.command_queue_properties.PROFILING_ENABLE)
        actx = actx_class(queue,
            allocator=cl_tools.MemoryPool(cl_tools.ImmediateAllocator(queue)),
            logmgr=logmgr)
    else:
        queue = cl.CommandQueue(cl_ctx)
        actx = actx_class(queue,
            allocator=cl_tools.MemoryPool(cl_tools.ImmediateAllocator(queue)))

    if restart_step is None:

        from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis
        mesh_dist = MPIMeshDistributor(comm)

        dim = 2
        nel_1d = 16

        if mesh_dist.is_mananger_rank():
            from meshmode.mesh.generation import generate_regular_rect_mesh
            mesh = generate_regular_rect_mesh(
                a=(-0.5,)*dim, b=(0.5,)*dim,
                nelements_per_axis=(nel_1d,)*dim)

            print("%d elements" % mesh.nelements)
            part_per_element = get_partition_by_pymetis(mesh, num_parts)
            local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts)

            del mesh

        else:
            local_mesh = mesh_dist.receive_mesh_part()

        fields = None

    else:
        from mirgecom.restart import read_restart_data
        restart_data = read_restart_data(
            actx, snapshot_pattern.format(step=restart_step, rank=rank)
        )
        local_mesh = restart_data["local_mesh"]
        nel_1d = restart_data["nel_1d"]
        assert comm.Get_size() == restart_data["num_parts"]

    order = 3

    discr = EagerDGDiscretization(actx, local_mesh, order=order,
                                  mpi_communicator=comm)

    current_cfl = 0.485
    wave_speed = 1.0
    from grudge.dt_utils import characteristic_lengthscales
    dt = current_cfl * characteristic_lengthscales(actx, discr) / wave_speed

    from grudge.op import nodal_min
    dt = nodal_min(discr, "vol", dt)

    t_final = 1

    if restart_step is None:
        t = 0
        istep = 0

        fields = flat_obj_array(
            bump(actx, discr),
            [discr.zeros(actx) for i in range(discr.dim)]
            )

    else:
        t = restart_data["t"]
        istep = restart_step
        assert istep == restart_step
        restart_fields = restart_data["fields"]
        old_order = restart_data["order"]
        if old_order != order:
            old_discr = EagerDGDiscretization(actx, local_mesh, order=old_order,
                                              mpi_communicator=comm)
            from meshmode.discretization.connection import make_same_mesh_connection
            connection = make_same_mesh_connection(actx, discr.discr_from_dd("vol"),
                                                   old_discr.discr_from_dd("vol"))
            fields = connection(restart_fields)
        else:
            fields = restart_fields

    if logmgr:
        logmgr_add_cl_device_info(logmgr, queue)
        logmgr_add_device_memory_usage(logmgr, queue)

        logmgr.add_watches(["step.max", "t_step.max", "t_log.max"])

        try:
            logmgr.add_watches(["memory_usage_python.max", "memory_usage_gpu.max"])
        except KeyError:
            pass

        if use_profiling:
            logmgr.add_watches(["multiply_time.max"])

        vis_timer = IntervalTimer("t_vis", "Time spent visualizing")
        logmgr.add_quantity(vis_timer)

    vis = make_visualizer(discr)

    def rhs(t, w):
        return wave_operator(discr, c=wave_speed, w=w)

    compiled_rhs = actx.compile(rhs)

    while t < t_final:
        if logmgr:
            logmgr.tick_before()

        # restart must happen at beginning of step
        if istep % 100 == 0 and (
                # Do not overwrite the restart file that we just read.
                istep != restart_step):
            from mirgecom.restart import write_restart_file
            write_restart_file(
                actx, restart_data={
                    "local_mesh": local_mesh,
                    "order": order,
                    "fields": fields,
                    "t": t,
                    "step": istep,
                    "nel_1d": nel_1d,
                    "num_parts": num_parts},
                filename=snapshot_pattern.format(step=istep, rank=rank),
                comm=comm
            )

        if istep % 10 == 0:
            print(istep, t, discr.norm(fields[0]))
            vis.write_parallel_vtk_file(
                comm,
                "fld-wave-mpi-%03d-%04d.vtu" % (rank, istep),
                [
                    ("u", fields[0]),
                    ("v", fields[1:]),
                ], overwrite=True
            )

        fields = thaw(freeze(fields, actx), actx)
        fields = rk4_step(fields, t, dt, compiled_rhs)

        t += dt
        istep += 1

        if logmgr:
            set_dt(logmgr, dt)
            logmgr.tick_after()

    final_soln = discr.norm(fields[0])
    assert np.abs(final_soln - 0.04409852463947439) < 1e-14