def parallel_map(func, *seq, **kwds): """the scatter-gather strategy for mpi""" skip = not bool(kwds.get('onall', True)) if skip is False: skip = None else: if size is 1: raise ValueError, "There must be at least one worker node" skip = master __SKIP[0] = skip NJOBS = len(seq[0]) # queue = __queue(*seq) #XXX: passing the *data* queue = __index(*seq) #XXX: passing the *index* results = [''] * NJOBS if rank == master: # each processor needs to do its set of jobs. message = queue.next() # send jobs to workers for worker in range(1, size): # master sending seq[ib:ie] to worker 'worker' comm.send(queue.next(), worker, 0) else: # worker 'rank' receiving job status = mpi.Status() message = comm.recv(source=master, tag=any_tag, status=status) # message received; no need to parse tags # now message is the part of seq that each worker has to do # result = map(func, *message) #XXX: receiving the *data* result = map(func, *lookup(seq, *message)) #XXX: receives an *index* if rank == master: _b, _e = get_workload(rank, size, NJOBS, skip=skip) #_b, _e = balance_workload(size, NJOBS, rank, skip=skip) results[_b:_e] = result[:] # at this point, all nodes must sent to master if rank != master: # worker 'rank' sending answer to master comm.send(result, master, rank) else: # master needs to receive once for each worker for worker in range(1, size): # master listening for worker status = mpi.Status() message = comm.recv(source=any_source, tag=any_tag, status=status) sender = status.source #anstag = status.tag # master received answer from worker 'sender' ib, ie = get_workload(sender, size, NJOBS, skip=skip) #ib, ie = balance_workload(size, NJOBS, sender, skip=skip) results[ib:ie] = message # master received results[ib:ie] from worker 'sender' #comm.barrier() return results
def parallel_map(func, *seq, **kwds): """the scatter-gather strategy for mpi""" skip = not bool(kwds.get('onall', True)) if skip is False: skip = None else: if size is 1: raise ValueError("There must be at least one worker node") skip = master __SKIP[0] = skip NJOBS = len(seq[0]) # queue = __queue(*seq) #XXX: passing the *data* queue = __index(*seq) #XXX: passing the *index* results = [''] * NJOBS if rank == master: # each processor needs to do its set of jobs. message = next(queue) # send jobs to workers for worker in range(1, size): # master sending seq[ib:ie] to worker 'worker' comm.send(next(queue), worker, 0) else: # worker 'rank' receiving job status = mpi.Status() message = comm.recv(source=master, tag=any_tag, status=status) # message received; no need to parse tags # now message is the part of seq that each worker has to do # result = map(func, *message) #XXX: receiving the *data* result = list(map(func, *lookup(seq, *message))) #XXX: receives an *index* if rank == master: _b, _e = get_workload(rank, size, NJOBS, skip=skip) #_b, _e = balance_workload(size, NJOBS, rank, skip=skip) results[_b:_e] = result[:] # at this point, all nodes must sent to master if rank != master: # worker 'rank' sending answer to master comm.send(result, master, rank) else: # master needs to receive once for each worker for worker in range(1, size): # master listening for worker status = mpi.Status() message = comm.recv(source=any_source, tag=any_tag, status=status) sender = status.source #anstag = status.tag # master received answer from worker 'sender' ib, ie = get_workload(sender, size, NJOBS, skip=skip) #ib, ie = balance_workload(size, NJOBS, sender, skip=skip) results[ib:ie] = message # master received results[ib:ie] from worker 'sender' #comm.barrier() return results
def __queue(*inputs): "iterator that groups inputs by index (i.e. [(x[0], a[0]),(x[1], a[1])])" #NJOBS = len(inputs[0]) #return (lookup(inputs, *get_workload(i, size, NJOBS, skip=__SKIP[0])) for i in range(size)) load = __index(*inputs) return (lookup(inputs, next(load)) for i in range(size))
def parallel_map(func, *seq, **kwds): """the worker pool strategy for mpi""" skip = not bool(kwds.get('onall', True)) __SKIP[0] = skip NJOBS = len(seq[0]) nodes = size if size <= NJOBS+skip else NJOBS+skip # nodes <= NJOBS+(master) #queue = __queue(*seq) #XXX: passing the *data* queue = __index(*seq) #XXX: passing the *index* results = [''] * NJOBS if rank == master: log.info("size: %s, NJOBS: %s, nodes: %s, skip: %s" % (size, NJOBS, nodes, skip)) if nodes == 1: # the pool is just the master if skip: raise ValueError, "There must be at least one worker node" return map(func, *seq) # spawn a separate process for jobs running on the master if not skip: pool = MPool(1) #XXX: poor pickling... use iSend/iRecv instead? #input = queue.next() #XXX: receiving the *data* input = lookup(seq, queue.next()) #XXX: receives an *index* log.info("MASTER SEND'ING(0)") mresult, mjobid = pool.apply_async(func, args=input), 0 # farm out to workers: 1-N for indexing, 0 reserved for termination for worker in range(1, nodes): #XXX: don't run on master... # master send next job to worker 'worker' with tag='worker' log.info("WORKER SEND'ING(%s)" % (worker-skip,)) comm.send(queue.next(), worker, worker) # start receiving recvjob = 0; donejob = 0 sendjob = nodes while recvjob < NJOBS: # was: for job in range(NJOBS) log.info("--job(%s,%s)--" % (sendjob-skip, recvjob)) if recvjob < NJOBS and donejob < nodes-1: status = mpi.Status() # master receive jobs from any_source and any_tag log.info("RECV'ING FROM WORKER") message = comm.recv(source=any_source,tag=any_tag,status=status) sender = status.source anstag = status.tag if anstag: recvjob += 1 # don't count a 'donejob' results[anstag-skip] = message # store the received message log.info("WORKER(%s): %s" % (anstag-skip, message)) if (sendjob-skip < NJOBS): # then workers are not done # master send next job to worker 'sender' with tag='jobid' log.info("WORKER SEND'ING(%s)" % (sendjob-skip)) input = queue.next() comm.send(input, sender, sendjob) sendjob += 1 else: # workers are done # send the "exit" signal log.info("WORKER SEND'ING(DONE)") comm.send("done", sender, EXITTAG) donejob += 1 log.info("WORKER LOOP DONE") # check if the master is done log.info("--job(%s,%s)--" % (sendjob-skip, recvjob)) if not skip and mresult.ready(): log.info("RECV'ING FROM MASTER") results[mjobid] = mresult.get() log.info("MASTER(%s): %s" % (mjobid, results[mjobid])) recvjob += 1 if (sendjob < NJOBS): log.info("MASTER SEND'ING(%s)" % sendjob) #input = queue.next() #XXX: receiving the *data* input = lookup(seq, queue.next()) #XXX: receives an *index* mresult, mjobid = pool.apply_async(func, args=input),sendjob sendjob += 1 else: mresult.ready = lambda : False log.info("MASTER LOOP DONE") log.info("WE ARE EXITING") if not skip: pool.close() pool.join() elif (nodes != size) and (rank >= nodes): # then skip this node... pass else: # then this is a worker node while True: # receive jobs from master @ any_tag status = mpi.Status() message = comm.recv(source=master, tag=any_tag, status=status) tag = status.tag if tag == EXITTAG: # worker is done break # worker evaluates received job #result = func(*message) #XXX: receiving the *data* result = func(*lookup(seq, message)) #XXX: receives an *index* # send result back to master comm.send(result, master, tag) #XXX: or write to results then merge? comm.barrier() return results
def parallel_map(func, *seq, **kwds): """the worker pool strategy for mpi""" skip = not bool(kwds.get('onall', True)) __SKIP[0] = skip NJOBS = len(seq[0]) nodes = size if size <= NJOBS+skip else NJOBS+skip # nodes <= NJOBS+(master) #queue = __queue(*seq) #XXX: passing the *data* queue = __index(*seq) #XXX: passing the *index* results = [''] * NJOBS if rank == master: log.info("size: %s, NJOBS: %s, nodes: %s, skip: %s" % (size, NJOBS, nodes, skip)) if nodes == 1: # the pool is just the master if skip: raise ValueError("There must be at least one worker node") return map(func, *seq) # spawn a separate process for jobs running on the master if not skip: pool = MPool(1) #XXX: poor pickling... use iSend/iRecv instead? #input = queue.next() #XXX: receiving the *data* input = lookup(seq, next(queue)) #XXX: receives an *index* log.info("MASTER SEND'ING(0)") mresult, mjobid = pool.apply_async(func, args=input), 0 # farm out to workers: 1-N for indexing, 0 reserved for termination for worker in range(1, nodes): #XXX: don't run on master... # master send next job to worker 'worker' with tag='worker' log.info("WORKER SEND'ING(%s)" % (worker-skip,)) comm.send(next(queue), worker, worker) # start receiving recvjob = 0; donejob = 0 sendjob = nodes while recvjob < NJOBS: # was: for job in range(NJOBS) log.info("--job(%s,%s)--" % (sendjob-skip, recvjob)) if recvjob < NJOBS and donejob < nodes-1: status = mpi.Status() # master receive jobs from any_source and any_tag log.info("RECV'ING FROM WORKER") message = comm.recv(source=any_source,tag=any_tag,status=status) sender = status.source anstag = status.tag if anstag: recvjob += 1 # don't count a 'donejob' results[anstag-skip] = message # store the received message log.info("WORKER(%s): %s" % (anstag-skip, message)) if (sendjob-skip < NJOBS): # then workers are not done # master send next job to worker 'sender' with tag='jobid' log.info("WORKER SEND'ING(%s)" % (sendjob-skip)) input = next(queue) comm.send(input, sender, sendjob) sendjob += 1 else: # workers are done # send the "exit" signal log.info("WORKER SEND'ING(DONE)") comm.send("done", sender, EXITTAG) donejob += 1 log.info("WORKER LOOP DONE") # check if the master is done log.info("--job(%s,%s)--" % (sendjob-skip, recvjob)) if not skip and mresult.ready(): log.info("RECV'ING FROM MASTER") results[mjobid] = mresult.get() log.info("MASTER(%s): %s" % (mjobid, results[mjobid])) recvjob += 1 if (sendjob < NJOBS): log.info("MASTER SEND'ING(%s)" % sendjob) #input = queue.next() #XXX: receiving the *data* input = lookup(seq, next(queue)) #XXX: receives an *index* mresult, mjobid = pool.apply_async(func, args=input),sendjob sendjob += 1 else: mresult.ready = lambda : False log.info("MASTER LOOP DONE") log.info("WE ARE EXITING") if not skip: pool.close() pool.join() elif (nodes != size) and (rank >= nodes): # then skip this node... pass else: # then this is a worker node while True: # receive jobs from master @ any_tag status = mpi.Status() message = comm.recv(source=master, tag=any_tag, status=status) tag = status.tag if tag == EXITTAG: # worker is done break # worker evaluates received job #result = func(*message) #XXX: receiving the *data* result = func(*lookup(seq, message)) #XXX: receives an *index* # send result back to master comm.send(result, master, tag) #XXX: or write to results then merge? comm.barrier() return results