def main(): parser = OptionParser() parser.set_defaults(n=100) parser.set_defaults(tmin=1e-3) parser.set_defaults(tmax=1) parser.set_defaults(profile='default') parser.add_option("-n", type='int', dest='n', help='the number of tasks to run') parser.add_option("-t", type='float', dest='tmin', help='the minimum task length in seconds') parser.add_option("-T", type='float', dest='tmax', help='the maximum task length in seconds') parser.add_option("-p", '--profile', type='str', dest='profile', help="the cluster profile [default: 'default']") (opts, args) = parser.parse_args() assert opts.tmax >= opts.tmin, "tmax must not be smaller than tmin" rc = Client() view = rc.load_balanced_view() print view rc.block = True nengines = len(rc.ids) with rc[:].sync_imports(): from IPython.utils.timing import time # the jobs should take a random time within a range times = [ random.random() * (opts.tmax - opts.tmin) + opts.tmin for i in range(opts.n) ] stime = sum(times) print "executing %i tasks, totalling %.1f secs on %i engines" % ( opts.n, stime, nengines) time.sleep(1) start = time.time() amr = view.map(time.sleep, times) amr.get() stop = time.time() ptime = stop - start scale = stime / ptime print "executed %.1f secs in %.1f secs" % (stime, ptime) print "%.3fx parallel performance on %i engines" % (scale, nengines) print "%.1f%% of theoretical max" % (100 * scale / nengines)
def init(): #init direct view global view view = Client(profile='mpi')[:] view.block = True view.execute('from numpy import *') view.execute('from mpi4py import MPI') view.execute('import h5py as h5') view.execute('import os') view.run('ndarray/interengine.py') get_rank = interactive(lambda: MPI.COMM_WORLD.Get_rank()) all_ranks = view.apply(get_rank) view['target2rank'] = all_ranks
def main(): parser = OptionParser() parser.set_defaults(n=100) parser.set_defaults(tmin=1e-3) parser.set_defaults(tmax=1) parser.set_defaults(profile='default') parser.add_option("-n", type='int', dest='n', help='the number of tasks to run') parser.add_option("-t", type='float', dest='tmin', help='the minimum task length in seconds') parser.add_option("-T", type='float', dest='tmax', help='the maximum task length in seconds') parser.add_option("-p", '--profile', type='str', dest='profile', help="the cluster profile [default: 'default']") (opts, args) = parser.parse_args() assert opts.tmax >= opts.tmin, "tmax must not be smaller than tmin" rc = Client() view = rc.load_balanced_view() print(view) rc.block = True nengines = len(rc.ids) with rc[:].sync_imports(): from IPython.utils.timing import time # the jobs should take a random time within a range times = [ random.random() * (opts.tmax - opts.tmin) + opts.tmin for i in range(opts.n)] stime = sum(times) print("executing %i tasks, totalling %.1f secs on %i engines" % (opts.n, stime, nengines)) time.sleep(1) start = time.time() amr = view.map(time.sleep, times) amr.get() stop = time.time() ptime = stop - start scale = stime / ptime print("executed %.1f secs in %.1f secs" % (stime, ptime)) print("%.3fx parallel performance on %i engines" % (scale, nengines)) print("%.1f%% of theoretical max" % (100 * scale / nengines))
def load_client(): global client, view client = Client() view = client.load_balanced_view() client.block = False client[:].use_dill()
of megabytes you might saturate the network interface of a single node and potentially its memory buffers if the messages are not consumed in a streamed manner. Note that the AllReduce scheme implemented with the spanning tree strategy impose the aggregation function to be commutative and distributive. It might not be the case if you implement the naive gather / reduce / broadcast strategy where you can reorder the partial data before performing the reduce. """ from IPython.parallel import Client, Reference # connect client and create views rc = Client() rc.block = True ids = rc.ids root_id = ids[0] root = rc[root_id] view = rc[:] # run bintree.py script defining bintree functions, etc. execfile('bintree.py') # generate binary tree of parents btree = bintree(ids) print "setting up binary tree interconnect:" print_bintree(btree)
potentially its memory buffers if the messages are not consumed in a streamed manner. Note that the AllReduce scheme implemented with the spanning tree strategy impose the aggregation function to be commutative and distributive. It might not be the case if you implement the naive gather / reduce / broadcast strategy where you can reorder the partial data before performing the reduce. """ from __future__ import print_function from IPython.parallel import Client, Reference # connect client and create views rc = Client() rc.block=True ids = rc.ids root_id = ids[0] root = rc[root_id] view = rc[:] # run bintree.py script defining bintree functions, etc. exec(compile(open('bintree.py').read(), 'bintree.py', 'exec')) # generate binary tree of parents btree = bintree(ids) print("setting up binary tree interconnect:") print_bintree(btree)
#------------------------------------------------------------------------- # Imports #------------------------------------------------------------------------- from __future__ import print_function import time from IPython.parallel import Client #------------------------------------------------------------------------- # Setup #------------------------------------------------------------------------- mux = Client()[:] mux.clear() mux.block = False ar1 = mux.apply(time.sleep, 5) ar2 = mux.push(dict(a=10, b=30, c=range(20000), d='The dog went swimming.')) ar3 = mux.pull(('a', 'b', 'd'), block=False) print("Try a non-blocking get_result") ar4 = mux.get_result() print("Now wait for all the results") mux.wait([ar1, ar2, ar3, ar4]) print("The last pull got:", ar4.r)
#------------------------------------------------------------------------------- # Imports #------------------------------------------------------------------------------- from __future__ import print_function import time from IPython.parallel import Client #------------------------------------------------------------------------------- # Setup #------------------------------------------------------------------------------- mux = Client()[:] mux.clear() mux.block = False ar1 = mux.apply(time.sleep, 5) ar2 = mux.push(dict(a=10, b=30, c=range(20000), d='The dog went swimming.')) ar3 = mux.pull(('a', 'b', 'd'), block=False) print("Try a non-blocking get_result") ar4 = mux.get_result() print("Now wait for all the results") mux.wait([ar1, ar2, ar3, ar4]) print("The last pull got:", ar4.r)
# -*- coding: utf-8 -*- import numpy as np from pprint import pprint from IPython.parallel import Client """ Launch in the shell an IPython the MPI cluster: ipcluster start --profile=mpi -n 4 """ c = Client(profile="mpi") c.block = True # Computations run synchronously. # All clusters! view = c[:] # Run / load the script: view.run("psum.py") # Set a in all clusters view.scatter("a", np.arange(16, dtype="float")) pprint(view["a"]) # excute and get the result view.execute("b = psum(a)") b = view["b"] pprint(b)