def mpi_communication_entrypoint(): cl_ctx = cl.create_some_context() queue = cl.CommandQueue(cl_ctx) actx = PyOpenCLArrayContext(queue) from mpi4py import MPI comm = MPI.COMM_WORLD i_local_rank = comm.Get_rank() num_parts = comm.Get_size() from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis mesh_dist = MPIMeshDistributor(comm) dim = 2 dt = 0.04 order = 4 if mesh_dist.is_mananger_rank(): from meshmode.mesh.generation import generate_regular_rect_mesh mesh = generate_regular_rect_mesh(a=(-0.5, ) * dim, b=(0.5, ) * dim, nelements_per_axis=(16, ) * dim) part_per_element = get_partition_by_pymetis(mesh, num_parts) local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts) else: local_mesh = mesh_dist.receive_mesh_part() vol_discr = DiscretizationCollection(actx, local_mesh, order=order, mpi_communicator=comm) source_center = np.array([0.1, 0.22, 0.33])[:local_mesh.dim] source_width = 0.05 source_omega = 3 sym_x = sym.nodes(local_mesh.dim) sym_source_center_dist = sym_x - source_center sym_t = sym.ScalarVariable("t") from grudge.models.wave import WeakWaveOperator from meshmode.mesh import BTAG_ALL, BTAG_NONE op = WeakWaveOperator( 0.1, vol_discr.dim, source_f=( sym.sin(source_omega * sym_t) * sym.exp(-np.dot(sym_source_center_dist, sym_source_center_dist) / source_width**2)), dirichlet_tag=BTAG_NONE, neumann_tag=BTAG_NONE, radiation_tag=BTAG_ALL, flux_type="upwind") from pytools.obj_array import flat_obj_array fields = flat_obj_array( vol_discr.zeros(actx), [vol_discr.zeros(actx) for i in range(vol_discr.dim)]) # FIXME # dt = op.estimate_rk4_timestep(vol_discr, fields=fields) # FIXME: Should meshmode consider BTAG_PARTITION to be a boundary? # Fails because: "found faces without boundary conditions" # op.check_bc_coverage(local_mesh) from logpyle import LogManager, \ add_general_quantities, \ add_run_info, \ IntervalTimer, EventCounter log_filename = None # NOTE: LogManager hangs when using a file on a shared directory. # log_filename = "grudge_log.dat" logmgr = LogManager(log_filename, "w", comm) add_run_info(logmgr) add_general_quantities(logmgr) log_quantities =\ {"rank_data_swap_timer": IntervalTimer("rank_data_swap_timer", "Time spent evaluating RankDataSwapAssign"), "rank_data_swap_counter": EventCounter("rank_data_swap_counter", "Number of RankDataSwapAssign instructions evaluated"), "exec_timer": IntervalTimer("exec_timer", "Total time spent executing instructions"), "insn_eval_timer": IntervalTimer("insn_eval_timer", "Time spend evaluating instructions"), "future_eval_timer": IntervalTimer("future_eval_timer", "Time spent evaluating futures"), "busy_wait_timer": IntervalTimer("busy_wait_timer", "Time wasted doing busy wait")} for quantity in log_quantities.values(): logmgr.add_quantity(quantity) logger.debug("\n%s", sym.pretty(op.sym_operator())) bound_op = bind(vol_discr, op.sym_operator()) def rhs(t, w): val, rhs.profile_data = bound_op(profile_data=rhs.profile_data, log_quantities=log_quantities, t=t, w=w) return val rhs.profile_data = {} dt_stepper = set_up_rk4("w", dt, fields, rhs) final_t = 4 nsteps = int(final_t / dt) logger.info("[%04d] dt %.5e nsteps %4d", i_local_rank, dt, nsteps) # from grudge.shortcuts import make_visualizer # vis = make_visualizer(vol_discr, vis_order=order) step = 0 norm = bind(vol_discr, sym.norm(2, sym.var("u"))) from time import time t_last_step = time() logmgr.tick_before() for event in dt_stepper.run(t_end=final_t): if isinstance(event, dt_stepper.StateComputed): assert event.component_id == "w" step += 1 logger.debug("[%04d] t = %.5e |u| = %.5e ellapsed %.5e", step, event.t, norm(u=event.state_component[0]), time() - t_last_step) # if step % 10 == 0: # vis.write_vtk_file("rank%d-fld-%04d.vtu" % (i_local_rank, step), # [("u", event.state_component[0]), # ("v", event.state_component[1:])]) t_last_step = time() logmgr.tick_after() logmgr.tick_before() logmgr.tick_after() def print_profile_data(data): logger.info( """execute() for rank %d:\n \tInstruction Evaluation: %g\n \tFuture Evaluation: %g\n \tBusy Wait: %g\n \tTotal: %g seconds""", i_local_rank, data["insn_eval_time"] / data["total_time"] * 100, data["future_eval_time"] / data["total_time"] * 100, data["busy_wait_time"] / data["total_time"] * 100, data["total_time"]) print_profile_data(rhs.profile_data) logmgr.close() logger.info("Rank %d exiting", i_local_rank)
def mpi_communication_entrypoint(): cl_ctx = cl.create_some_context() queue = cl.CommandQueue(cl_ctx) actx = PyOpenCLArrayContext(queue, force_device_scalars=True) from mpi4py import MPI comm = MPI.COMM_WORLD i_local_rank = comm.Get_rank() num_parts = comm.Get_size() from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis mesh_dist = MPIMeshDistributor(comm) dim = 2 order = 4 if mesh_dist.is_mananger_rank(): from meshmode.mesh.generation import generate_regular_rect_mesh mesh = generate_regular_rect_mesh(a=(-0.5,)*dim, b=(0.5,)*dim, nelements_per_axis=(16,)*dim) part_per_element = get_partition_by_pymetis(mesh, num_parts) local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts) del mesh else: local_mesh = mesh_dist.receive_mesh_part() dcoll = DiscretizationCollection(actx, local_mesh, order=order, mpi_communicator=comm) def source_f(actx, dcoll, t=0): source_center = np.array([0.1, 0.22, 0.33])[:dcoll.dim] source_width = 0.05 source_omega = 3 nodes = thaw(dcoll.nodes(), actx) source_center_dist = flat_obj_array( [nodes[i] - source_center[i] for i in range(dcoll.dim)] ) return ( np.sin(source_omega*t) * actx.np.exp( -np.dot(source_center_dist, source_center_dist) / source_width**2 ) ) from grudge.models.wave import WeakWaveOperator from meshmode.mesh import BTAG_ALL, BTAG_NONE wave_op = WeakWaveOperator( dcoll, 0.1, source_f=source_f, dirichlet_tag=BTAG_NONE, neumann_tag=BTAG_NONE, radiation_tag=BTAG_ALL, flux_type="upwind" ) fields = flat_obj_array( dcoll.zeros(actx), [dcoll.zeros(actx) for i in range(dcoll.dim)] ) dt = actx.to_numpy( 2/3 * wave_op.estimate_rk4_timestep(actx, dcoll, fields=fields)) wave_op.check_bc_coverage(local_mesh) from logpyle import LogManager, \ add_general_quantities, \ add_run_info log_filename = None # NOTE: LogManager hangs when using a file on a shared directory. # log_filename = "grudge_log.dat" logmgr = LogManager(log_filename, "w", comm) add_run_info(logmgr) add_general_quantities(logmgr) def rhs(t, w): return wave_op.operator(t, w) dt_stepper = set_up_rk4("w", dt, fields, rhs) final_t = 4 nsteps = int(final_t/dt) logger.info("[%04d] dt %.5e nsteps %4d", i_local_rank, dt, nsteps) step = 0 def norm(u): return op.norm(dcoll, u, 2) from time import time t_last_step = time() logmgr.tick_before() for event in dt_stepper.run(t_end=final_t): if isinstance(event, dt_stepper.StateComputed): assert event.component_id == "w" step += 1 logger.info("[%04d] t = %.5e |u| = %.5e ellapsed %.5e", step, event.t, norm(u=event.state_component[0]), time() - t_last_step) t_last_step = time() logmgr.tick_after() logmgr.tick_before() logmgr.tick_after() logmgr.close() logger.info("Rank %d exiting", i_local_rank)
def main(ctx_factory, dim=2, order=4, visualize=False): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) actx = PyOpenCLArrayContext( queue, allocator=cl_tools.MemoryPool(cl_tools.ImmediateAllocator(queue)), force_device_scalars=True, ) from meshmode.mesh.generation import generate_regular_rect_mesh mesh = generate_regular_rect_mesh(a=(-0.5, ) * dim, b=(0.5, ) * dim, nelements_per_axis=(20, ) * dim) dcoll = DiscretizationCollection(actx, mesh, order=order) def source_f(actx, dcoll, t=0): source_center = np.array([0.1, 0.22, 0.33])[:dcoll.dim] source_width = 0.05 source_omega = 3 nodes = thaw(dcoll.nodes(), actx) source_center_dist = flat_obj_array( [nodes[i] - source_center[i] for i in range(dcoll.dim)]) return (np.sin(source_omega * t) * actx.np.exp( -np.dot(source_center_dist, source_center_dist) / source_width**2)) x = thaw(dcoll.nodes(), actx) ones = dcoll.zeros(actx) + 1 c = actx.np.where(np.dot(x, x) < 0.15, 0.1 * ones, 0.2 * ones) from grudge.models.wave import VariableCoefficientWeakWaveOperator from meshmode.mesh import BTAG_ALL, BTAG_NONE wave_op = VariableCoefficientWeakWaveOperator(dcoll, c, source_f=source_f, dirichlet_tag=BTAG_NONE, neumann_tag=BTAG_NONE, radiation_tag=BTAG_ALL, flux_type="upwind") fields = flat_obj_array(dcoll.zeros(actx), [dcoll.zeros(actx) for i in range(dcoll.dim)]) wave_op.check_bc_coverage(mesh) def rhs(t, w): return wave_op.operator(t, w) dt = 2 / 3 * wave_op.estimate_rk4_timestep(actx, dcoll, fields=fields) dt_stepper = set_up_rk4("w", dt, fields, rhs) final_t = 1 nsteps = int(final_t / dt) + 1 logger.info("dt=%g nsteps=%d", dt, nsteps) from grudge.shortcuts import make_visualizer vis = make_visualizer(dcoll) step = 0 def norm(u): return op.norm(dcoll, u, 2) from time import time t_last_step = time() if visualize: u = fields[0] v = fields[1:] vis.write_vtk_file(f"fld-var-propogation-speed-{step:04d}.vtu", [ ("u", u), ("v", v), ("c", c), ]) for event in dt_stepper.run(t_end=final_t): if isinstance(event, dt_stepper.StateComputed): assert event.component_id == "w" step += 1 if step % 10 == 0: logger.info(f"step: {step} t: {time()-t_last_step} " f"L2: {norm(u=event.state_component[0])}") if visualize: vis.write_vtk_file( f"fld-var-propogation-speed-{step:04d}.vtu", [ ("u", event.state_component[0]), ("v", event.state_component[1:]), ("c", c), ]) t_last_step = time() # NOTE: These are here to ensure the solution is bounded for the # time interval specified assert norm(u=event.state_component[0]) < 1
def main(ctx_factory, dim=2, order=4, visualize=False): cl_ctx = cl.create_some_context() queue = cl.CommandQueue(cl_ctx) actx = PyOpenCLArrayContext( queue, allocator=cl_tools.MemoryPool(cl_tools.ImmediateAllocator(queue)), force_device_scalars=True, ) comm = MPI.COMM_WORLD num_parts = comm.Get_size() from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis mesh_dist = MPIMeshDistributor(comm) if mesh_dist.is_mananger_rank(): from meshmode.mesh.generation import generate_regular_rect_mesh mesh = generate_regular_rect_mesh(a=(-0.5, ) * dim, b=(0.5, ) * dim, nelements_per_axis=(16, ) * dim) logger.info("%d elements", mesh.nelements) part_per_element = get_partition_by_pymetis(mesh, num_parts) local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts) del mesh else: local_mesh = mesh_dist.receive_mesh_part() dcoll = DiscretizationCollection(actx, local_mesh, order=order, mpi_communicator=comm) def source_f(actx, dcoll, t=0): source_center = np.array([0.1, 0.22, 0.33])[:dcoll.dim] source_width = 0.05 source_omega = 3 nodes = thaw(dcoll.nodes(), actx) source_center_dist = flat_obj_array( [nodes[i] - source_center[i] for i in range(dcoll.dim)]) return (np.sin(source_omega * t) * actx.np.exp( -np.dot(source_center_dist, source_center_dist) / source_width**2)) from grudge.models.wave import WeakWaveOperator from meshmode.mesh import BTAG_ALL, BTAG_NONE wave_op = WeakWaveOperator(dcoll, 0.1, source_f=source_f, dirichlet_tag=BTAG_NONE, neumann_tag=BTAG_NONE, radiation_tag=BTAG_ALL, flux_type="upwind") fields = flat_obj_array(dcoll.zeros(actx), [dcoll.zeros(actx) for i in range(dcoll.dim)]) dt = 2 / 3 * wave_op.estimate_rk4_timestep(actx, dcoll, fields=fields) wave_op.check_bc_coverage(local_mesh) def rhs(t, w): return wave_op.operator(t, w) dt_stepper = set_up_rk4("w", dt, fields, rhs) final_t = 10 nsteps = int(final_t / dt) + 1 if comm.rank == 0: logger.info("dt=%g nsteps=%d", dt, nsteps) from grudge.shortcuts import make_visualizer vis = make_visualizer(dcoll) step = 0 def norm(u): return op.norm(dcoll, u, 2) from time import time t_last_step = time() if visualize: u = fields[0] v = fields[1:] vis.write_parallel_vtk_file( comm, f"fld-wave-min-mpi-{{rank:03d}}-{step:04d}.vtu", [ ("u", u), ("v", v), ]) for event in dt_stepper.run(t_end=final_t): if isinstance(event, dt_stepper.StateComputed): assert event.component_id == "w" step += 1 l2norm = norm(u=event.state_component[0]) if step % 10 == 0: if comm.rank == 0: logger.info(f"step: {step} " f"t: {time()-t_last_step} " f"L2: {l2norm}") if visualize: vis.write_parallel_vtk_file( comm, f"fld-wave-min-mpi-{{rank:03d}}-{step:04d}.vtu", [ ("u", event.state_component[0]), ("v", event.state_component[1:]), ]) t_last_step = time() # NOTE: These are here to ensure the solution is bounded for the # time interval specified assert l2norm < 1
def main(write_output=True, order=4): cl_ctx = cl.create_some_context() queue = cl.CommandQueue(cl_ctx) actx = PyOpenCLArrayContext(queue) dims = 2 from meshmode.mesh.generation import generate_regular_rect_mesh mesh = generate_regular_rect_mesh( a=(-0.5,)*dims, b=(0.5,)*dims, nelements_per_axis=(20,)*dims) discr = DiscretizationCollection(actx, mesh, order=order) source_center = np.array([0.1, 0.22, 0.33])[:mesh.dim] source_width = 0.05 source_omega = 3 sym_x = sym.nodes(mesh.dim) sym_source_center_dist = sym_x - source_center sym_t = sym.ScalarVariable("t") c = sym.If(sym.Comparison( np.dot(sym_x, sym_x), "<", 0.15), np.float32(0.1), np.float32(0.2)) from grudge.models.wave import VariableCoefficientWeakWaveOperator from meshmode.mesh import BTAG_ALL, BTAG_NONE op = VariableCoefficientWeakWaveOperator(c, discr.dim, source_f=( sym.sin(source_omega*sym_t) * sym.exp( -np.dot(sym_source_center_dist, sym_source_center_dist) / source_width**2)), dirichlet_tag=BTAG_NONE, neumann_tag=BTAG_NONE, radiation_tag=BTAG_ALL, flux_type="upwind") from pytools.obj_array import flat_obj_array fields = flat_obj_array(discr.zeros(actx), [discr.zeros(actx) for i in range(discr.dim)]) op.check_bc_coverage(mesh) c_eval = bind(discr, c)(actx) bound_op = bind(discr, op.sym_operator()) def rhs(t, w): return bound_op(t=t, w=w) if mesh.dim == 2: dt = 0.04 * 0.3 elif mesh.dim == 3: dt = 0.02 * 0.1 dt_stepper = set_up_rk4("w", dt, fields, rhs) final_t = 1 nsteps = int(final_t/dt) print("dt=%g nsteps=%d" % (dt, nsteps)) from grudge.shortcuts import make_visualizer vis = make_visualizer(discr) step = 0 norm = bind(discr, sym.norm(2, sym.var("u"))) from time import time t_last_step = time() for event in dt_stepper.run(t_end=final_t): if isinstance(event, dt_stepper.StateComputed): assert event.component_id == "w" step += 1 print(step, event.t, norm(u=event.state_component[0]), time()-t_last_step) if step % 10 == 0: vis.write_vtk_file("fld-var-propogation-speed-%04d.vtu" % step, [ ("u", event.state_component[0]), ("v", event.state_component[1:]), ("c", c_eval), ]) t_last_step = time()
def simple_wave_entrypoint(dim=2, num_elems=256, order=4, num_steps=30, log_filename="grudge.dat"): cl_ctx = cl.create_some_context() queue = cl.CommandQueue(cl_ctx) from mpi4py import MPI comm = MPI.COMM_WORLD num_parts = comm.Get_size() n = int(num_elems**(1. / dim)) from meshmode.distributed import MPIMeshDistributor mesh_dist = MPIMeshDistributor(comm) if mesh_dist.is_mananger_rank(): from meshmode.mesh.generation import generate_regular_rect_mesh mesh = generate_regular_rect_mesh(a=(-0.5, ) * dim, b=(0.5, ) * dim, n=(n, ) * dim) from pymetis import part_graph _, p = part_graph(num_parts, xadj=mesh.nodal_adjacency.neighbors_starts.tolist(), adjncy=mesh.nodal_adjacency.neighbors.tolist()) part_per_element = np.array(p) local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts) else: local_mesh = mesh_dist.receive_mesh_part() vol_discr = DiscretizationCollection(cl_ctx, local_mesh, order=order, mpi_communicator=comm) source_center = np.array([0.1, 0.22, 0.33])[:local_mesh.dim] source_width = 0.05 source_omega = 3 sym_x = sym.nodes(local_mesh.dim) sym_source_center_dist = sym_x - source_center sym_t = sym.ScalarVariable("t") from grudge.models.wave import StrongWaveOperator from meshmode.mesh import BTAG_ALL, BTAG_NONE op = StrongWaveOperator( -0.1, vol_discr.dim, source_f=( sym.sin(source_omega * sym_t) * sym.exp(-np.dot(sym_source_center_dist, sym_source_center_dist) / source_width**2)), dirichlet_tag=BTAG_NONE, neumann_tag=BTAG_NONE, radiation_tag=BTAG_ALL, flux_type="upwind") from pytools.obj_array import join_fields fields = join_fields( vol_discr.zeros(queue), [vol_discr.zeros(queue) for i in range(vol_discr.dim)]) from logpyle import LogManager, \ add_general_quantities, \ add_run_info, \ IntervalTimer, EventCounter # NOTE: LogManager hangs when using a file on a shared directory. logmgr = LogManager(log_filename, "w", comm) add_run_info(logmgr) add_general_quantities(logmgr) log_quantities =\ {"rank_data_swap_timer": IntervalTimer("rank_data_swap_timer", "Time spent evaluating RankDataSwapAssign"), "rank_data_swap_counter": EventCounter("rank_data_swap_counter", "Number of RankDataSwapAssign instructions evaluated"), "exec_timer": IntervalTimer("exec_timer", "Total time spent executing instructions"), "insn_eval_timer": IntervalTimer("insn_eval_timer", "Time spend evaluating instructions"), "future_eval_timer": IntervalTimer("future_eval_timer", "Time spent evaluating futures"), "busy_wait_timer": IntervalTimer("busy_wait_timer", "Time wasted doing busy wait")} for quantity in log_quantities.values(): logmgr.add_quantity(quantity) bound_op = bind(vol_discr, op.sym_operator()) def rhs(t, w): val, rhs.profile_data = bound_op(queue, profile_data=rhs.profile_data, log_quantities=log_quantities, t=t, w=w) return val rhs.profile_data = {} dt = 0.04 dt_stepper = set_up_rk4("w", dt, fields, rhs) logmgr.tick_before() for event in dt_stepper.run(t_end=dt * num_steps): if isinstance(event, dt_stepper.StateComputed): logmgr.tick_after() logmgr.tick_before() logmgr.tick_after() def print_profile_data(data): print("""execute() for rank %d: \tInstruction Evaluation: %f%% \tFuture Evaluation: %f%% \tBusy Wait: %f%% \tTotal: %f seconds""" % (comm.Get_rank(), data['insn_eval_time'] / data['total_time'] * 100, data['future_eval_time'] / data['total_time'] * 100, data['busy_wait_time'] / data['total_time'] * 100, data['total_time'])) print_profile_data(rhs.profile_data) logmgr.close()