def _debug(boolean): """if True, print debuging info and save temporary files after pickling""" if boolean: log.setLevel(logging.DEBUG) _save(True) else: log.setLevel(logging.WARN) _save(False) return _pid = '.' + str(os.getpid()) + '.' defaults = { 'nodes': str(cpu_count()), 'program': which_strategy(lazy=True) or 'ezscatter', # serialize to tempfile 'mpirun': which_mpirun() or 'mpiexec', 'python': which_python(lazy=True) or 'python', 'progargs': '', 'outfile': 'results%sout' % _pid, 'errfile': 'errors%sout' % _pid, 'jobfile': 'job%sid' % _pid, 'scheduler': '', 'timelimit': '00:02', 'queue': 'normal', 'workdir': '.' } #FIXME FIXME: __init__ and self for 'nodes' vs 'ncpus' is confused; see __repr__
def map(self, func, *args, **kwds): """ The function 'func', it's arguments, and the results of the map are all stored and shipped across communicators as pickled strings. Optional Keyword Arguments: - onall = if True, include master as a worker [default: True] NOTE: 'onall' defaults to True for both the scatter-gather and the worker pool strategies. A worker pool with onall=True may have added difficulty in pickling functions, due to asynchronous message passing with itself. Additional keyword arguments are passed to 'func' along with 'args'. """ # set strategy if self.scatter: kwds['onall'] = kwds.get('onall', True) else: kwds['onall'] = kwds.get('onall', True) #XXX: has pickling issues config = {} config['program'] = which_strategy(self.scatter, lazy=True) # serialize function and arguments to files modfile = self._modularize(func) argfile = self._pickleargs(args, kwds) # Keep the above handles as long as you want the tempfiles to exist if _SAVE[0]: _HOLD.append(modfile) _HOLD.append(argfile) # create an empty results file resfilename = tempfile.mktemp(dir=self.workdir) # process the module name modname = self._modulenamemangle(modfile.name) # build the launcher's argument string config['progargs'] = ' '.join([modname, argfile.name, \ resfilename, self.workdir]) #XXX: better with or w/o scheduler baked into command ? #XXX: better... if self.scheduler: self.scheduler.submit(command) ? #XXX: better if self.__launch modifies command to include scheduler ? if _SAVE[0]: self._save_in(modfile.name, argfile.name) # func, pickled input # create any necessary job files if self.scheduler: config.update(self.scheduler._prepare()) ###################################################################### # build the launcher command command = self._launcher(config) log.info('(skipping): %s' % command) if log.level == logging.DEBUG: error = False res = [] else: try: subproc = self.__launch(command) # sumbit the jobs #print "after __launch" #pid = subproc.pid # get process id error = subproc.wait() # block until all done ## just to be sure... here's a loop to wait for results file ## maxcount = self.timeout counter = 0 #print "before wait" while not os.path.exists(resfilename): call('sync', shell=True) from time import sleep sleep(1) counter += 1 if counter >= maxcount: print("Warning: exceeded timeout (%s s)" % maxcount) break #print "after wait" # read result back res = dill.load(open(resfilename, 'rb')) #print "got result" except: error = True #print "got error" ###################################################################### # cleanup files if _SAVE[0] and log.level == logging.WARN: self._save_out(resfilename) # pickled output self._cleanup(resfilename, modfile.name, argfile.name) if self.scheduler and not _SAVE[0]: self.scheduler._cleanup() if error: raise IOError("launch failed: %s" % command) return res
return def _debug(boolean): """if True, print debuging info and save temporary files after pickling""" if boolean: log.setLevel(logging.DEBUG) _save(True) else: log.setLevel(logging.WARN) _save(False) return _pid = '.' + str(os.getpid()) + '.' defaults = { 'nodes' : str(cpu_count()), 'program' : which_strategy(lazy=True) or 'ezscatter.py', # serialize to tempfile 'mpirun' : which_mpirun() or 'mpiexec', 'python' : which_python(lazy=True) or 'python', 'progargs' : '', 'outfile' : 'results%sout' % _pid, 'errfile' : 'errors%sout' % _pid, 'jobfile' : 'job%sid' % _pid, 'scheduler' : '', 'timelimit' : '00:02', 'queue' : 'normal', 'workdir' : '.' }
def map(self, func, *args, **kwds): """ The function 'func', it's arguments, and the results of the map are all stored and shipped across communicators as pickled strings. Optional Keyword Arguments: - onall = if True, include master as a worker [default: True] NOTE: 'onall' defaults to True for both the scatter-gather and the worker pool strategies. A worker pool with onall=True may have added difficulty in pickling functions, due to asynchronous message passing with itself. Additional keyword arguments are passed to 'func' along with 'args'. """ # set strategy if self.scatter: kwds['onall'] = kwds.get('onall', True) else: kwds['onall'] = kwds.get('onall', True) #XXX: has pickling issues config = {} config['program'] = which_strategy(self.scatter, lazy=True) # serialize function and arguments to files modfile = self._modularize(func) argfile = self._pickleargs(args, kwds) # Keep the above handles as long as you want the tempfiles to exist if _SAVE[0]: _HOLD.append(modfile) _HOLD.append(argfile) # create an empty results file resfilename = tempfile.mktemp(dir=self.workdir) # process the module name modname = self._modulenamemangle(modfile.name) # build the launcher's argument string config['progargs'] = ' '.join([modname, argfile.name, \ resfilename, self.workdir]) #XXX: better with or w/o scheduler baked into command ? #XXX: better... if self.scheduler: self.scheduler.submit(command) ? #XXX: better if self.__launch modifies command to include scheduler ? if _SAVE[0]: self._save_in(modfile.name, argfile.name) # func, pickled input # create any necessary job files if self.scheduler: config.update(self.scheduler._prepare()) ###################################################################### # build the launcher command command = self._launcher(config) log.info('(skipping): %s' % command) if log.level == logging.DEBUG: error = False res = [] else: try: subproc = self.__launch(command) # sumbit the jobs #print "after __launch" #pid = subproc.pid # get process id error = subproc.wait() # block until all done ## just to be sure... here's a loop to wait for results file ## maxcount = self.timeout; counter = 0 #print "before wait" while not os.path.exists(resfilename): call('sync', shell=True) from time import sleep sleep(1); counter += 1 if counter >= maxcount: print "Warning: exceeded timeout (%s s)" % maxcount break #print "after wait" # read result back res = dill.load(open(resfilename,'r')) #print "got result" except: error = True #print "got error" ###################################################################### # cleanup files if _SAVE[0] and log.level == logging.WARN: self._save_out(resfilename) # pickled output self._cleanup(resfilename, modfile.name, argfile.name) if self.scheduler and not _SAVE[0]: self.scheduler._cleanup() if error: raise IOError, "launch failed: %s" % command return res
def ez_map(func, *arglist, **kwds): """higher-level map interface for selected mapper and launcher maps function 'func' across arguments 'arglist'. arguments and results are stored and sent as pickled strings, while function 'func' is inspected and written as a source file to be imported. Further Input: nodes -- the number of parallel nodes launcher -- the launcher object scheduler -- the scheduler object mapper -- the mapper object timelimit -- string representation of maximum run time (e.g. '00:02') queue -- string name of selected queue (e.g. 'normal') """ import dill as pickle import os.path, tempfile, subprocess from pyina.tools import which_strategy # mapper = None (allow for use of default mapper) if 'mapper' in kwds: mapper = kwds['mapper'] if mapper() == "mpi_pool": scatter = False elif mapper() == "mpi_scatter": scatter = True else: raise NotImplementedError("Mapper '%s' not found." % mapper()) ezdefaults['program'] = which_strategy(scatter, lazy=True) # override the defaults if 'nnodes' in kwds: ezdefaults['nodes'] = kwds['nnodes'] if 'nodes' in kwds: ezdefaults['nodes'] = kwds['nodes'] if 'timelimit' in kwds: ezdefaults['timelimit'] = kwds['timelimit'] if 'queue' in kwds: ezdefaults['queue'] = kwds['queue'] # set the scheduler & launcher (or use the given default) if 'launcher' in kwds: launcher = kwds['launcher'] else: launcher = mpirun_launcher #XXX: default = non_mpi? if 'scheduler' in kwds: scheduler = kwds['scheduler'] else: scheduler = '' # set scratch directory (most often required for queue launcher) if 'workdir' in kwds: ezdefaults['workdir'] = kwds['workdir'] else: if launcher in [torque_launcher, moab_launcher] \ or scheduler in [torque_scheduler, moab_scheduler]: ezdefaults['workdir'] = os.path.expanduser("~") from dill.temp import dump, dump_source # write func source to a NamedTemporaryFile (instead of pickle.dump) # ezrun requires 'FUNC = <function>' to be included as module.FUNC modfile = dump_source(func, alias='FUNC', dir=ezdefaults['workdir']) # standard pickle.dump of inputs to a NamedTemporaryFile kwd = {'onall':kwds.get('onall',True)} argfile = dump((arglist,kwd), suffix='.arg', dir=ezdefaults['workdir']) # Keep the above return values for as long as you want the tempfile to exist resfilename = tempfile.mktemp(dir=ezdefaults['workdir']) modname = os.path.splitext(os.path.basename(modfile.name))[0] ezdefaults['progargs'] = ' '.join([modname, argfile.name, resfilename, \ ezdefaults['workdir']]) #HOLD.append(modfile) #HOLD.append(argfile) if launcher in [torque_launcher, moab_launcher] \ or scheduler in [torque_scheduler, moab_scheduler]: jobfilename = tempfile.mktemp(dir=ezdefaults['workdir']) outfilename = tempfile.mktemp(dir=ezdefaults['workdir']) errfilename = tempfile.mktemp(dir=ezdefaults['workdir']) ezdefaults['jobfile'] = jobfilename ezdefaults['outfile'] = outfilename ezdefaults['errfile'] = errfilename # get the appropriate launcher for the scheduler if scheduler in [torque_scheduler] and launcher in [mpirun_launcher]: launcher = torque_launcher ezdefaults['scheduler'] = scheduler().mpirun elif scheduler in [moab_scheduler] and launcher in [mpirun_launcher]: launcher = moab_launcher ezdefaults['scheduler'] = scheduler().mpirun elif scheduler in [torque_scheduler] and launcher in [srun_launcher]: launcher = torque_launcher ezdefaults['scheduler'] = scheduler().srun elif scheduler in [moab_scheduler] and launcher in [srun_launcher]: launcher = moab_launcher ezdefaults['scheduler'] = scheduler().srun elif scheduler in [torque_scheduler] and launcher in [aprun_launcher]: launcher = torque_launcher ezdefaults['scheduler'] = scheduler().aprun elif scheduler in [moab_scheduler] and launcher in [aprun_launcher]: launcher = moab_launcher ezdefaults['scheduler'] = scheduler().aprun elif scheduler in [torque_scheduler] and launcher in [serial_launcher]: launcher = torque_launcher ezdefaults['scheduler'] = scheduler().serial elif scheduler in [moab_scheduler] and launcher in [serial_launcher]: launcher = moab_launcher ezdefaults['scheduler'] = scheduler().serial #else: scheduler = None # counting on the function below to block until done. #print 'executing: ', launcher(ezdefaults) launch(launcher(ezdefaults)) #FIXME: use subprocessing if launcher in [torque_launcher, moab_launcher] \ or scheduler in [torque_scheduler, moab_scheduler]: import time #BLOCKING while (not os.path.exists(resfilename)): #XXX: or out* to confirm start time.sleep(sleeptime) #XXX: wait for results... may infinite loop? subprocess.call('rm -f %s' % jobfilename, shell=True) subprocess.call('rm -f %s' % outfilename, shell=True) subprocess.call('rm -f %s' % errfilename, shell=True) # debuggery... output = function(inputs) #subprocess.call('cp -f %s modfile.py' % modfile.name, shell=True) # getsource; FUNC=func #subprocess.call('cp -f %s argfile.py' % argfile.name, shell=True) # pickled list of inputs #subprocess.call('cp -f %s resfile.py' % resfilename, shell=True) # pickled list of output # read result back res = pickle.load(open(resfilename,'rb')) subprocess.call('rm -f %s' % resfilename, shell=True) subprocess.call('rm -f %sc' % modfile.name, shell=True) return res
def ez_map2(func, *arglist, **kwds): """higher-level map interface for selected mapper and launcher maps function 'func' across arguments 'arglist'. arguments and results are stored and sent as pickled strings, the function 'func' is also stored and sent as pickled strings. This is different than 'ez_map', in that it does not use temporary files to store the mapped function. Further Input: nodes -- the number of parallel nodes launcher -- the launcher object scheduler -- the scheduler object mapper -- the mapper object timelimit -- string representation of maximum run time (e.g. '00:02') queue -- string name of selected queue (e.g. 'normal') """ import dill as pickle import os.path, tempfile, subprocess from pyina.tools import which_strategy # mapper = None (allow for use of default mapper) if kwds.has_key('mapper'): mapper = kwds['mapper'] if mapper() == "mpi_pool": scatter = False elif mapper() == "mpi_scatter": scatter = True else: raise NotImplementedError, "Mapper '%s' not found." % mapper() ezdefaults['program'] = which_strategy(scatter, lazy=True) # override the defaults if kwds.has_key('nnodes'): ezdefaults['nodes'] = kwds['nnodes'] if kwds.has_key('nodes'): ezdefaults['nodes'] = kwds['nodes'] if kwds.has_key('timelimit'): ezdefaults['timelimit'] = kwds['timelimit'] if kwds.has_key('queue'): ezdefaults['queue'] = kwds['queue'] # set the scheduler & launcher (or use the given default) if kwds.has_key('launcher'): launcher = kwds['launcher'] else: launcher = mpirun_launcher #XXX: default = non_mpi? if kwds.has_key('scheduler'): scheduler = kwds['scheduler'] else: scheduler = '' # set scratch directory (most often required for queue launcher) if kwds.has_key('workdir'): ezdefaults['workdir'] = kwds['workdir'] else: if launcher in [torque_launcher, moab_launcher] \ or scheduler in [torque_scheduler, moab_scheduler]: ezdefaults['workdir'] = os.path.expanduser("~") from dill.temp import dump # standard pickle.dump of inputs to a NamedTemporaryFile modfile = dump(func, suffix='.pik', dir=ezdefaults['workdir']) kwd = {'onall':kwds.get('onall',True)} argfile = dump((arglist,kwd), suffix='.arg', dir=ezdefaults['workdir']) # Keep the above return values for as long as you want the tempfile to exist resfilename = tempfile.mktemp(dir=ezdefaults['workdir']) ezdefaults['progargs'] = ' '.join([modfile.name,argfile.name,resfilename, \ ezdefaults['workdir']]) #HOLD.append(modfile) #HOLD.append(argfile) if launcher in [torque_launcher, moab_launcher] \ or scheduler in [torque_scheduler, moab_scheduler]: jobfilename = tempfile.mktemp(dir=ezdefaults['workdir']) outfilename = tempfile.mktemp(dir=ezdefaults['workdir']) errfilename = tempfile.mktemp(dir=ezdefaults['workdir']) ezdefaults['jobfile'] = jobfilename ezdefaults['outfile'] = outfilename ezdefaults['errfile'] = errfilename # get the appropriate launcher for the scheduler if scheduler in [torque_scheduler] and launcher in [mpirun_launcher]: launcher = torque_launcher ezdefaults['scheduler'] = scheduler().mpirun elif scheduler in [moab_scheduler] and launcher in [mpirun_launcher]: launcher = moab_launcher ezdefaults['scheduler'] = scheduler().mpirun elif scheduler in [torque_scheduler] and launcher in [srun_launcher]: launcher = torque_launcher ezdefaults['scheduler'] = scheduler().srun elif scheduler in [moab_scheduler] and launcher in [srun_launcher]: launcher = moab_launcher ezdefaults['scheduler'] = scheduler().srun elif scheduler in [torque_scheduler] and launcher in [aprun_launcher]: launcher = torque_launcher ezdefaults['scheduler'] = scheduler().aprun elif scheduler in [moab_scheduler] and launcher in [aprun_launcher]: launcher = moab_launcher ezdefaults['scheduler'] = scheduler().aprun elif scheduler in [torque_scheduler] and launcher in [serial_launcher]: launcher = torque_launcher ezdefaults['scheduler'] = scheduler().serial elif scheduler in [moab_scheduler] and launcher in [serial_launcher]: launcher = moab_launcher ezdefaults['scheduler'] = scheduler().serial #else: scheduler = None # counting on the function below to block until done. #print 'executing: ', launcher(ezdefaults) launch(launcher(ezdefaults)) #FIXME: use subprocessing if launcher in [torque_launcher, moab_launcher] \ or scheduler in [torque_scheduler, moab_scheduler]: import time #BLOCKING while (not os.path.exists(resfilename)): #XXX: or out* to confirm start time.sleep(sleeptime) #XXX: wait for results... may infinite loop? subprocess.call('rm -f %s' % jobfilename, shell=True) subprocess.call('rm -f %s' % outfilename, shell=True) subprocess.call('rm -f %s' % errfilename, shell=True) # read result back res = pickle.load(open(resfilename,'r')) subprocess.call('rm -f %s' % resfilename, shell=True) return res
def ez_map2(func, *arglist, **kwds): """higher-level map interface for selected mapper and launcher maps function 'func' across arguments 'arglist'. arguments and results are stored and sent as pickled strings, the function 'func' is also stored and sent as pickled strings. This is different than 'ez_map', in that it does not use temporary files to store the mapped function. Further Input: nodes -- the number of parallel nodes launcher -- the launcher object scheduler -- the scheduler object mapper -- the mapper object timelimit -- string representation of maximum run time (e.g. '00:02') queue -- string name of selected queue (e.g. 'normal') """ import dill as pickle import os.path, tempfile, subprocess from pyina.tools import which_strategy # mapper = None (allow for use of default mapper) if kwds.has_key('mapper'): mapper = kwds['mapper'] if mapper() == "mpi_pool": scatter = False elif mapper() == "mpi_scatter": scatter = True else: raise NotImplementedError, "Mapper '%s' not found." % mapper() ezdefaults['program'] = which_strategy(scatter, lazy=True) # override the defaults if kwds.has_key('nnodes'): ezdefaults['nodes'] = kwds['nnodes'] if kwds.has_key('nodes'): ezdefaults['nodes'] = kwds['nodes'] if kwds.has_key('timelimit'): ezdefaults['timelimit'] = kwds['timelimit'] if kwds.has_key('queue'): ezdefaults['queue'] = kwds['queue'] # set the scheduler & launcher (or use the given default) if kwds.has_key('launcher'): launcher = kwds['launcher'] else: launcher = mpirun_launcher #XXX: default = non_mpi? if kwds.has_key('scheduler'): scheduler = kwds['scheduler'] else: scheduler = '' # set scratch directory (most often required for queue launcher) if kwds.has_key('workdir'): ezdefaults['workdir'] = kwds['workdir'] else: if launcher in [torque_launcher, moab_launcher] \ or scheduler in [torque_scheduler, moab_scheduler]: ezdefaults['workdir'] = os.path.expanduser("~") from dill.temp import dump # standard pickle.dump of inputs to a NamedTemporaryFile modfile = dump(func, suffix='.pik', dir=ezdefaults['workdir']) kwd = {'onall': kwds.get('onall', True)} argfile = dump((arglist, kwd), suffix='.arg', dir=ezdefaults['workdir']) # Keep the above return values for as long as you want the tempfile to exist resfilename = tempfile.mktemp(dir=ezdefaults['workdir']) ezdefaults['progargs'] = ' '.join([modfile.name,argfile.name,resfilename, \ ezdefaults['workdir']]) #HOLD.append(modfile) #HOLD.append(argfile) if launcher in [torque_launcher, moab_launcher] \ or scheduler in [torque_scheduler, moab_scheduler]: jobfilename = tempfile.mktemp(dir=ezdefaults['workdir']) outfilename = tempfile.mktemp(dir=ezdefaults['workdir']) errfilename = tempfile.mktemp(dir=ezdefaults['workdir']) ezdefaults['jobfile'] = jobfilename ezdefaults['outfile'] = outfilename ezdefaults['errfile'] = errfilename # get the appropriate launcher for the scheduler if scheduler in [torque_scheduler] and launcher in [mpirun_launcher]: launcher = torque_launcher ezdefaults['scheduler'] = scheduler().mpirun elif scheduler in [moab_scheduler] and launcher in [mpirun_launcher]: launcher = moab_launcher ezdefaults['scheduler'] = scheduler().mpirun elif scheduler in [torque_scheduler] and launcher in [srun_launcher]: launcher = torque_launcher ezdefaults['scheduler'] = scheduler().srun elif scheduler in [moab_scheduler] and launcher in [srun_launcher]: launcher = moab_launcher ezdefaults['scheduler'] = scheduler().srun elif scheduler in [torque_scheduler] and launcher in [aprun_launcher]: launcher = torque_launcher ezdefaults['scheduler'] = scheduler().aprun elif scheduler in [moab_scheduler] and launcher in [aprun_launcher]: launcher = moab_launcher ezdefaults['scheduler'] = scheduler().aprun elif scheduler in [torque_scheduler] and launcher in [serial_launcher]: launcher = torque_launcher ezdefaults['scheduler'] = scheduler().serial elif scheduler in [moab_scheduler] and launcher in [serial_launcher]: launcher = moab_launcher ezdefaults['scheduler'] = scheduler().serial #else: scheduler = None # counting on the function below to block until done. #print 'executing: ', launcher(ezdefaults) launch(launcher(ezdefaults)) #FIXME: use subprocessing if launcher in [torque_launcher, moab_launcher] \ or scheduler in [torque_scheduler, moab_scheduler]: import time #BLOCKING while (not os.path.exists(resfilename) ): #XXX: or out* to confirm start time.sleep(sleeptime) #XXX: wait for results... may infinite loop? subprocess.call('rm -f %s' % jobfilename, shell=True) subprocess.call('rm -f %s' % outfilename, shell=True) subprocess.call('rm -f %s' % errfilename, shell=True) # read result back res = pickle.load(open(resfilename, 'r')) subprocess.call('rm -f %s' % resfilename, shell=True) return res
return def _debug(boolean): """if True, print debuging info and save temporary files after pickling""" if boolean: log.setLevel(logging.DEBUG) _save(True) else: log.setLevel(logging.WARN) _save(False) return _pid = '.' + str(os.getpid()) + '.' defaults = { 'nodes' : str(cpu_count()), 'program' : which_strategy(lazy=True) or 'ezscatter', # serialize to tempfile 'mpirun' : which_mpirun() or 'mpiexec', 'python' : which_python(lazy=True) or 'python', 'progargs' : '', 'outfile' : 'results%sout' % _pid, 'errfile' : 'errors%sout' % _pid, 'jobfile' : 'job%sid' % _pid, 'scheduler' : '', 'timelimit' : '00:02', 'queue' : 'normal', 'workdir' : '.' }