示例#1
0
def main(ctx_factory, dim=2, order=3, visualize=False):
    cl_ctx = ctx_factory()
    queue = cl.CommandQueue(cl_ctx)
    actx = PyOpenCLArrayContext(
        queue,
        allocator=cl_tools.MemoryPool(cl_tools.ImmediateAllocator(queue)),
        force_device_scalars=True,
    )

    comm = MPI.COMM_WORLD
    num_parts = comm.Get_size()

    from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis
    mesh_dist = MPIMeshDistributor(comm)

    nel_1d = 16

    if mesh_dist.is_mananger_rank():
        from meshmode.mesh.generation import generate_regular_rect_mesh
        mesh = generate_regular_rect_mesh(a=(-0.5, ) * dim,
                                          b=(0.5, ) * dim,
                                          nelements_per_axis=(nel_1d, ) * dim)

        logger.info("%d elements", mesh.nelements)

        part_per_element = get_partition_by_pymetis(mesh, num_parts)

        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element,
                                               num_parts)

        del mesh

    else:
        local_mesh = mesh_dist.receive_mesh_part()

    dcoll = DiscretizationCollection(actx,
                                     local_mesh,
                                     order=order,
                                     mpi_communicator=comm)

    fields = flat_obj_array(bump(actx, dcoll),
                            [dcoll.zeros(actx) for i in range(dcoll.dim)])

    c = 1
    dt = 0.45 * estimate_rk4_timestep(actx, dcoll, c)

    vis = make_visualizer(dcoll)

    def rhs(t, w):
        return wave_operator(dcoll, c=c, w=w)

    if comm.rank == 0:
        logger.info("dt = %g", dt)

    t = 0
    t_final = 3
    istep = 0
    while t < t_final:
        fields = rk4_step(fields, t, dt, rhs)

        l2norm = op.norm(dcoll, fields[0], 2)

        if istep % 10 == 0:
            linfnorm = op.norm(dcoll, fields[0], np.inf)
            nodalmax = op.nodal_max(dcoll, "vol", fields[0])
            nodalmin = op.nodal_min(dcoll, "vol", fields[0])
            if comm.rank == 0:
                logger.info(f"step: {istep} t: {t} "
                            f"L2: {l2norm} "
                            f"Linf: {linfnorm} "
                            f"sol max: {nodalmax} "
                            f"sol min: {nodalmin}")
            if visualize:
                vis.write_parallel_vtk_file(
                    comm, f"fld-wave-eager-mpi-{{rank:03d}}-{istep:04d}.vtu", [
                        ("u", fields[0]),
                        ("v", fields[1:]),
                    ])

        t += dt
        istep += 1

        # NOTE: These are here to ensure the solution is bounded for the
        # time interval specified
        assert l2norm < 1
示例#2
0
def simple_wave_entrypoint(dim=2,
                           num_elems=256,
                           order=4,
                           num_steps=30,
                           log_filename="grudge.dat"):
    cl_ctx = cl.create_some_context()
    queue = cl.CommandQueue(cl_ctx)

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    num_parts = comm.Get_size()
    n = int(num_elems**(1. / dim))

    from meshmode.distributed import MPIMeshDistributor
    mesh_dist = MPIMeshDistributor(comm)

    if mesh_dist.is_mananger_rank():
        from meshmode.mesh.generation import generate_regular_rect_mesh
        mesh = generate_regular_rect_mesh(a=(-0.5, ) * dim,
                                          b=(0.5, ) * dim,
                                          n=(n, ) * dim)

        from pymetis import part_graph
        _, p = part_graph(num_parts,
                          xadj=mesh.nodal_adjacency.neighbors_starts.tolist(),
                          adjncy=mesh.nodal_adjacency.neighbors.tolist())
        part_per_element = np.array(p)

        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element,
                                               num_parts)
    else:
        local_mesh = mesh_dist.receive_mesh_part()

    vol_discr = DiscretizationCollection(cl_ctx,
                                         local_mesh,
                                         order=order,
                                         mpi_communicator=comm)

    source_center = np.array([0.1, 0.22, 0.33])[:local_mesh.dim]
    source_width = 0.05
    source_omega = 3

    sym_x = sym.nodes(local_mesh.dim)
    sym_source_center_dist = sym_x - source_center
    sym_t = sym.ScalarVariable("t")

    from grudge.models.wave import StrongWaveOperator
    from meshmode.mesh import BTAG_ALL, BTAG_NONE
    op = StrongWaveOperator(
        -0.1,
        vol_discr.dim,
        source_f=(
            sym.sin(source_omega * sym_t) *
            sym.exp(-np.dot(sym_source_center_dist, sym_source_center_dist) /
                    source_width**2)),
        dirichlet_tag=BTAG_NONE,
        neumann_tag=BTAG_NONE,
        radiation_tag=BTAG_ALL,
        flux_type="upwind")

    from pytools.obj_array import join_fields
    fields = join_fields(
        vol_discr.zeros(queue),
        [vol_discr.zeros(queue) for i in range(vol_discr.dim)])

    from logpyle import LogManager, \
            add_general_quantities, \
            add_run_info, \
            IntervalTimer, EventCounter
    # NOTE: LogManager hangs when using a file on a shared directory.
    logmgr = LogManager(log_filename, "w", comm)
    add_run_info(logmgr)
    add_general_quantities(logmgr)
    log_quantities =\
        {"rank_data_swap_timer": IntervalTimer("rank_data_swap_timer",
                        "Time spent evaluating RankDataSwapAssign"),
        "rank_data_swap_counter": EventCounter("rank_data_swap_counter",
                        "Number of RankDataSwapAssign instructions evaluated"),
        "exec_timer": IntervalTimer("exec_timer",
                        "Total time spent executing instructions"),
        "insn_eval_timer": IntervalTimer("insn_eval_timer",
                        "Time spend evaluating instructions"),
        "future_eval_timer": IntervalTimer("future_eval_timer",
                        "Time spent evaluating futures"),
        "busy_wait_timer": IntervalTimer("busy_wait_timer",
                        "Time wasted doing busy wait")}
    for quantity in log_quantities.values():
        logmgr.add_quantity(quantity)

    bound_op = bind(vol_discr, op.sym_operator())

    def rhs(t, w):
        val, rhs.profile_data = bound_op(queue,
                                         profile_data=rhs.profile_data,
                                         log_quantities=log_quantities,
                                         t=t,
                                         w=w)
        return val

    rhs.profile_data = {}

    dt = 0.04
    dt_stepper = set_up_rk4("w", dt, fields, rhs)

    logmgr.tick_before()
    for event in dt_stepper.run(t_end=dt * num_steps):
        if isinstance(event, dt_stepper.StateComputed):
            logmgr.tick_after()
            logmgr.tick_before()
    logmgr.tick_after()

    def print_profile_data(data):
        print("""execute() for rank %d:
            \tInstruction Evaluation: %f%%
            \tFuture Evaluation: %f%%
            \tBusy Wait: %f%%
            \tTotal: %f seconds""" %
              (comm.Get_rank(), data['insn_eval_time'] / data['total_time'] *
               100, data['future_eval_time'] / data['total_time'] * 100,
               data['busy_wait_time'] / data['total_time'] * 100,
               data['total_time']))

    print_profile_data(rhs.profile_data)
    logmgr.close()
示例#3
0
def main(ctx_factory, dim=2, order=4, visualize=False):
    cl_ctx = cl.create_some_context()
    queue = cl.CommandQueue(cl_ctx)
    actx = PyOpenCLArrayContext(
        queue,
        allocator=cl_tools.MemoryPool(cl_tools.ImmediateAllocator(queue)),
        force_device_scalars=True,
    )

    comm = MPI.COMM_WORLD
    num_parts = comm.Get_size()

    from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis
    mesh_dist = MPIMeshDistributor(comm)

    if mesh_dist.is_mananger_rank():
        from meshmode.mesh.generation import generate_regular_rect_mesh
        mesh = generate_regular_rect_mesh(
                a=(-0.5,)*dim,
                b=(0.5,)*dim,
                nelements_per_axis=(16,)*dim)

        logger.info("%d elements", mesh.nelements)

        part_per_element = get_partition_by_pymetis(mesh, num_parts)

        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts)

        del mesh

    else:
        local_mesh = mesh_dist.receive_mesh_part()

    dcoll = DiscretizationCollection(actx, local_mesh, order=order,
            mpi_communicator=comm)

    def source_f(actx, dcoll, t=0):
        source_center = np.array([0.1, 0.22, 0.33])[:dcoll.dim]
        source_width = 0.05
        source_omega = 3
        nodes = thaw(dcoll.nodes(), actx)
        source_center_dist = flat_obj_array(
            [nodes[i] - source_center[i] for i in range(dcoll.dim)]
        )
        return (
            np.sin(source_omega*t)
            * actx.np.exp(
                -np.dot(source_center_dist, source_center_dist)
                / source_width**2
            )
        )

    from grudge.models.wave import WeakWaveOperator
    from meshmode.mesh import BTAG_ALL, BTAG_NONE

    wave_op = WeakWaveOperator(
        dcoll,
        0.1,
        source_f=source_f,
        dirichlet_tag=BTAG_NONE,
        neumann_tag=BTAG_NONE,
        radiation_tag=BTAG_ALL,
        flux_type="upwind"
    )

    fields = flat_obj_array(
        dcoll.zeros(actx),
        [dcoll.zeros(actx) for i in range(dcoll.dim)]
    )

    dt = actx.to_numpy(
        2/3 * wave_op.estimate_rk4_timestep(actx, dcoll, fields=fields))

    wave_op.check_bc_coverage(local_mesh)

    def rhs(t, w):
        return wave_op.operator(t, w)

    dt_stepper = set_up_rk4("w", dt, fields, rhs)

    final_t = 10
    nsteps = int(final_t/dt) + 1

    if comm.rank == 0:
        logger.info("dt=%g nsteps=%d", dt, nsteps)

    from grudge.shortcuts import make_visualizer
    vis = make_visualizer(dcoll)

    step = 0

    def norm(u):
        return op.norm(dcoll, u, 2)

    from time import time
    t_last_step = time()

    if visualize:
        u = fields[0]
        v = fields[1:]
        vis.write_parallel_vtk_file(
            comm,
            f"fld-wave-min-mpi-{{rank:03d}}-{step:04d}.vtu",
            [
                ("u", u),
                ("v", v),
            ]
        )

    for event in dt_stepper.run(t_end=final_t):
        if isinstance(event, dt_stepper.StateComputed):
            assert event.component_id == "w"

            step += 1
            l2norm = norm(u=event.state_component[0])

            if step % 10 == 0:
                if comm.rank == 0:
                    logger.info(f"step: {step} "
                                f"t: {time()-t_last_step} "
                                f"L2: {l2norm}")
                if visualize:
                    vis.write_parallel_vtk_file(
                        comm,
                        f"fld-wave-min-mpi-{{rank:03d}}-{step:04d}.vtu",
                        [
                            ("u", event.state_component[0]),
                            ("v", event.state_component[1:]),
                        ]
                    )
            t_last_step = time()

            # NOTE: These are here to ensure the solution is bounded for the
            # time interval specified
            assert l2norm < 1
示例#4
0
def main(snapshot_pattern="wave-mpi-{step:04d}-{rank:04d}.pkl",
         restart_step=None,
         use_profiling=False,
         use_logmgr=False,
         actx_class=PyOpenCLArrayContext):
    """Drive the example."""
    cl_ctx = cl.create_some_context()
    queue = cl.CommandQueue(cl_ctx)

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    num_parts = comm.Get_size()

    logmgr = initialize_logmgr(use_logmgr,
                               filename="wave-mpi.sqlite",
                               mode="wu",
                               mpi_comm=comm)
    if use_profiling:
        queue = cl.CommandQueue(
            cl_ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
        actx = actx_class(queue,
                          allocator=cl_tools.MemoryPool(
                              cl_tools.ImmediateAllocator(queue)),
                          logmgr=logmgr)
    else:
        queue = cl.CommandQueue(cl_ctx)
        actx = actx_class(queue,
                          allocator=cl_tools.MemoryPool(
                              cl_tools.ImmediateAllocator(queue)))

    if restart_step is None:

        from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis
        mesh_dist = MPIMeshDistributor(comm)

        dim = 2
        nel_1d = 16

        if mesh_dist.is_mananger_rank():
            from meshmode.mesh.generation import generate_regular_rect_mesh
            mesh = generate_regular_rect_mesh(a=(-0.5, ) * dim,
                                              b=(0.5, ) * dim,
                                              nelements_per_axis=(nel_1d, ) *
                                              dim)

            print("%d elements" % mesh.nelements)
            part_per_element = get_partition_by_pymetis(mesh, num_parts)
            local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element,
                                                   num_parts)

            del mesh

        else:
            local_mesh = mesh_dist.receive_mesh_part()

        fields = None

    else:
        from mirgecom.restart import read_restart_data
        restart_data = read_restart_data(
            actx, snapshot_pattern.format(step=restart_step, rank=rank))
        local_mesh = restart_data["local_mesh"]
        nel_1d = restart_data["nel_1d"]
        assert comm.Get_size() == restart_data["num_parts"]

    order = 3

    discr = EagerDGDiscretization(actx,
                                  local_mesh,
                                  order=order,
                                  mpi_communicator=comm)

    current_cfl = 0.485
    wave_speed = 1.0
    from grudge.dt_utils import characteristic_lengthscales
    nodal_dt = characteristic_lengthscales(actx, discr) / wave_speed

    from grudge.op import nodal_min
    dt = actx.to_numpy(current_cfl * nodal_min(discr, "vol", nodal_dt))[()]

    t_final = 1

    if restart_step is None:
        t = 0
        istep = 0

        fields = flat_obj_array(bump(actx, discr),
                                [discr.zeros(actx) for i in range(discr.dim)])

    else:
        t = restart_data["t"]
        istep = restart_step
        assert istep == restart_step
        restart_fields = restart_data["fields"]
        old_order = restart_data["order"]
        if old_order != order:
            old_discr = EagerDGDiscretization(actx,
                                              local_mesh,
                                              order=old_order,
                                              mpi_communicator=comm)
            from meshmode.discretization.connection import make_same_mesh_connection
            connection = make_same_mesh_connection(
                actx, discr.discr_from_dd("vol"),
                old_discr.discr_from_dd("vol"))
            fields = connection(restart_fields)
        else:
            fields = restart_fields

    if logmgr:
        logmgr_add_cl_device_info(logmgr, queue)
        logmgr_add_device_memory_usage(logmgr, queue)

        logmgr.add_watches(["step.max", "t_step.max", "t_log.max"])

        try:
            logmgr.add_watches(
                ["memory_usage_python.max", "memory_usage_gpu.max"])
        except KeyError:
            pass

        if use_profiling:
            logmgr.add_watches(["multiply_time.max"])

        vis_timer = IntervalTimer("t_vis", "Time spent visualizing")
        logmgr.add_quantity(vis_timer)

    vis = make_visualizer(discr)

    def rhs(t, w):
        return wave_operator(discr, c=wave_speed, w=w)

    compiled_rhs = actx.compile(rhs)

    while t < t_final:
        if logmgr:
            logmgr.tick_before()

        # restart must happen at beginning of step
        if istep % 100 == 0 and (
                # Do not overwrite the restart file that we just read.
                istep != restart_step):
            from mirgecom.restart import write_restart_file
            write_restart_file(actx,
                               restart_data={
                                   "local_mesh": local_mesh,
                                   "order": order,
                                   "fields": fields,
                                   "t": t,
                                   "step": istep,
                                   "nel_1d": nel_1d,
                                   "num_parts": num_parts
                               },
                               filename=snapshot_pattern.format(step=istep,
                                                                rank=rank),
                               comm=comm)

        if istep % 10 == 0:
            print(istep, t, actx.to_numpy(discr.norm(fields[0])))
            vis.write_parallel_vtk_file(comm,
                                        "fld-wave-mpi-%03d-%04d.vtu" %
                                        (rank, istep), [
                                            ("u", fields[0]),
                                            ("v", fields[1:]),
                                        ],
                                        overwrite=True)

        fields = thaw(freeze(fields, actx), actx)
        fields = rk4_step(fields, t, dt, compiled_rhs)

        t += dt
        istep += 1

        if logmgr:
            set_dt(logmgr, dt)
            logmgr.tick_after()

    final_soln = actx.to_numpy(discr.norm(fields[0]))
    assert np.abs(final_soln - 0.04409852463947439) < 1e-14
def _test_mpi_boundary_swap(dim, order, num_groups):
    from meshmode.distributed import MPIMeshDistributor, MPIBoundaryCommSetupHelper

    from mpi4py import MPI
    mpi_comm = MPI.COMM_WORLD
    i_local_part = mpi_comm.Get_rank()
    num_parts = mpi_comm.Get_size()

    mesh_dist = MPIMeshDistributor(mpi_comm)

    if mesh_dist.is_mananger_rank():
        np.random.seed(42)
        from meshmode.mesh.generation import generate_warped_rect_mesh
        meshes = [
            generate_warped_rect_mesh(dim, order=order, n=4)
            for _ in range(num_groups)
        ]

        if num_groups > 1:
            from meshmode.mesh.processing import merge_disjoint_meshes
            mesh = merge_disjoint_meshes(meshes)
        else:
            mesh = meshes[0]

        part_per_element = np.random.randint(num_parts, size=mesh.nelements)

        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element,
                                               num_parts)
    else:
        local_mesh = mesh_dist.receive_mesh_part()

    group_factory = PolynomialWarpAndBlendGroupFactory(order)

    from meshmode.array_context import PyOpenCLArrayContext
    cl_ctx = cl.create_some_context()
    queue = cl.CommandQueue(cl_ctx)
    actx = PyOpenCLArrayContext(queue)

    from meshmode.discretization import Discretization
    vol_discr = Discretization(actx, local_mesh, group_factory)

    from meshmode.distributed import get_connected_partitions
    connected_parts = get_connected_partitions(local_mesh)
    assert i_local_part not in connected_parts
    bdry_setup_helpers = {}
    local_bdry_conns = {}

    from meshmode.discretization.connection import make_face_restriction
    from meshmode.mesh import BTAG_PARTITION
    for i_remote_part in connected_parts:
        local_bdry_conns[i_remote_part] = make_face_restriction(
            actx, vol_discr, group_factory, BTAG_PARTITION(i_remote_part))

        setup_helper = bdry_setup_helpers[i_remote_part] = \
                MPIBoundaryCommSetupHelper(
                        mpi_comm, actx, local_bdry_conns[i_remote_part],
                        i_remote_part, bdry_grp_factory=group_factory)

        setup_helper.post_sends()

    remote_to_local_bdry_conns = {}
    from meshmode.discretization.connection import check_connection
    while bdry_setup_helpers:
        for i_remote_part, setup_helper in bdry_setup_helpers.items():
            if setup_helper.is_setup_ready():
                assert bdry_setup_helpers.pop(i_remote_part) is setup_helper
                conn = setup_helper.complete_setup()
                check_connection(actx, conn)
                remote_to_local_bdry_conns[i_remote_part] = conn
                break

        # FIXME: Not ideal, busy-waits

    _test_data_transfer(mpi_comm, actx, local_bdry_conns,
                        remote_to_local_bdry_conns, connected_parts)

    logger.debug("Rank %d exiting", i_local_part)