def example_2(): print info('main line') pool_size = multiprocessing.cpu_count() jobs = [ multiprocessing.Process(target=greet, args=(n, )) for n in ('bob', 'jane') ] mputil.launch_and_wait(jobs, pool_size)
def example_3(): pool_size = multiprocessing.cpu_count() mgr = multiprocessing.Manager() d = mgr.dict() jobs = [multiprocessing.Process(target=fill_arr, args=(d, i, i), name=i) for i in xrange(10)] mputil.launch_and_wait(jobs, pool_size) a = np.zeros((20, 10)) for key, val in d.items(): a[:, key] = val print a
def example_3(): pool_size = multiprocessing.cpu_count() mgr = multiprocessing.Manager() d = mgr.dict() jobs = [ multiprocessing.Process(target=fill_arr, args=(d, i, i), name=i) for i in xrange(10) ] mputil.launch_and_wait(jobs, pool_size) a = np.zeros((20, 10)) for key, val in d.items(): a[:, key] = val print a
def rundrizCR(imgObjList, configObj, procSteps=None): if procSteps is not None: procSteps.addStep('Driz_CR') step_name = util.getSectionName(configObj, _STEP_NUM) if not configObj[step_name]['driz_cr']: log.info('Cosmic-ray identification (driz_cr) step not performed.') return paramDict = configObj[step_name] paramDict['crbit'] = configObj['crbit'] paramDict['inmemory'] = imgObjList[0].inmemory log.info("USER INPUT PARAMETERS for Driz_CR Step:") util.printParams(paramDict, log=log) # if we have the cpus and s/w, ok, but still allow user to set pool size pool_size = util.get_pool_size(configObj.get('num_cores'), len(imgObjList)) if imgObjList[0].inmemory: pool_size = 1 # reason why is output in drizzle step subprocs = [] if pool_size > 1: log.info('Executing {:d} parallel workers'.format(pool_size)) mp_ctx = multiprocessing.get_context('fork') for image in imgObjList: manager = mp_ctx.Manager() mgr = manager.dict({}) p = mp_ctx.Process( target=_driz_cr, name='drizCR._driz_cr()', # for err msgs args=(image, mgr, paramDict.dict())) subprocs.append(p) image.virtualOutputs.update(mgr) mputil.launch_and_wait(subprocs, pool_size) # blocks till all done else: log.info('Executing serially') for image in imgObjList: _driz_cr(image, image.virtualOutputs, paramDict) if procSteps is not None: procSteps.endStep('Driz_CR')
def rundrizCR(imgObjList, configObj, procSteps=None): if procSteps is not None: procSteps.addStep('Driz_CR') step_name = util.getSectionName(configObj, _STEP_NUM) if not configObj[step_name]['driz_cr']: log.info('Cosmic-ray identification (driz_cr) step not performed.') return paramDict = configObj[step_name] paramDict['crbit'] = configObj['crbit'] paramDict['inmemory'] = imgObjList[0].inmemory log.info("USER INPUT PARAMETERS for Driz_CR Step:") util.printParams(paramDict, log=log) # if we have the cpus and s/w, ok, but still allow user to set pool size pool_size = util.get_pool_size(configObj.get('num_cores'), len(imgObjList)) if imgObjList[0].inmemory: pool_size = 1 # reason why is output in drizzle step subprocs = [] if pool_size > 1: log.info('Executing {:d} parallel workers'.format(pool_size)) for image in imgObjList: manager = multiprocessing.Manager() mgr = manager.dict({}) p = multiprocessing.Process( target=_driz_cr, name='drizCR._driz_cr()', # for err msgs args=(image, mgr, paramDict.dict()) ) subprocs.append(p) image.virtualOutputs.update(mgr) mputil.launch_and_wait(subprocs, pool_size) # blocks till all done else: log.info('Executing serially') for image in imgObjList: _driz_cr(image, image.virtualOutputs, paramDict) if procSteps is not None: procSteps.endStep('Driz_CR')
def _process_input_wcs(infiles, wcskey, updatewcs): """ This is a subset of process_input(), for internal use only. This is the portion of input handling which sets/updates WCS data, and is a performance hit - a target for parallelization. Returns the expanded list of filenames. """ # Run parseinput though it's likely already been done in processFilenames outfiles = parseinput.parseinput(infiles)[0] # Disable parallel processing here for now until hardware I/O gets "wider". # Since this part is IO bound, parallelizing doesn't help more than a little # in most cases, and may actually slow this down on some desktop nodes. # cfgval_num_cores = None # get this from paramDict # pool_size = util.get_pool_size(cfgval_num_cores, len(outfiles)) pool_size = 1 # do the WCS updating if wcskey in ['', ' ', 'INDEF', None]: if updatewcs: log.info('Updating input WCS using "updatewcs"') else: log.info('Resetting input WCS to be based on WCS key = %s' % wcskey) if pool_size > 1: log.info('Executing %d parallel workers' % pool_size) subprocs = [] mp_ctx = multiprocessing.get_context('fork') for fname in outfiles: p = mp_ctx.Process( target=_process_input_wcs_single, name='processInput._process_input_wcs()', # for err msgs args=(fname, wcskey, updatewcs)) subprocs.append(p) mputil.launch_and_wait(subprocs, pool_size) # blocks till all done else: log.info('Executing serially') for fname in outfiles: _process_input_wcs_single(fname, wcskey, updatewcs) return outfiles
def _process_input_wcs(infiles, wcskey, updatewcs): """ This is a subset of process_input(), for internal use only. This is the portion of input handling which sets/updates WCS data, and is a performance hit - a target for parallelization. Returns the expanded list of filenames. """ # Run parseinput though it's likely already been done in processFilenames outfiles = parseinput.parseinput(infiles)[0] # Disable parallel processing here for now until hardware I/O gets "wider". # Since this part is IO bound, parallelizing doesn't help more than a little # in most cases, and may actually slow this down on some desktop nodes. # cfgval_num_cores = None # get this from paramDict # pool_size = util.get_pool_size(cfgval_num_cores, len(outfiles)) pool_size = 1 # do the WCS updating if wcskey in ['', ' ', 'INDEF', None]: if updatewcs: log.info('Updating input WCS using "updatewcs"') else: log.info('Resetting input WCS to be based on WCS key = %s' % wcskey) if pool_size > 1: log.info('Executing %d parallel workers' % pool_size) subprocs = [] for fname in outfiles: p = multiprocessing.Process(target=_process_input_wcs_single, name='processInput._process_input_wcs()', # for err msgs args=(fname, wcskey, updatewcs) ) subprocs.append(p) mputil.launch_and_wait(subprocs, pool_size) # blocks till all done else: log.info('Executing serially') for fname in outfiles: _process_input_wcs_single(fname, wcskey, updatewcs) return outfiles
def run_driz(imageObjectList, output_wcs, paramDict, single, build, wcsmap=None): """ Perform drizzle operation on input to create output. The input parameters originally was a list of dictionaries, one for each input, that matches the primary parameters for an ``IRAF`` `drizzle` task. This method would then loop over all the entries in the list and run `drizzle` for each entry. Parameters required for input in paramDict: build,single,units,wt_scl,pixfrac,kernel,fillval, rot,scale,xsh,ysh,blotnx,blotny,outnx,outny,data """ # Insure that input imageObject is a list if not isinstance(imageObjectList, list): imageObjectList = [imageObjectList] # # Setup the versions info dictionary for output to PRIMARY header # The keys will be used as the name reported in the header, as-is # _versions = { 'AstroDrizzle': __version__, 'PyFITS': util.__fits_version__, 'Numpy': util.__numpy_version__ } # Set sub-sampling rate for drizzling # stepsize = 2.0 log.info(' **Using sub-sampling value of %s for kernel %s' % (paramDict['stepsize'], paramDict['kernel'])) maskval = interpret_maskval(paramDict) outwcs = copy.deepcopy(output_wcs) # Check for existance of output file. if (not single and build and fileutil.findFile(imageObjectList[0].outputNames['outFinal'])): log.info('Removing previous output product...') os.remove(imageObjectList[0].outputNames['outFinal']) # print out parameters being used for drizzling log.info("Running Drizzle to create output frame with WCS of: ") output_wcs.printwcs() # Will we be running in parallel? pool_size = util.get_pool_size(paramDict.get('num_cores'), len(imageObjectList)) run_parallel = single and pool_size > 1 if run_parallel: log.info(f'Executing {pool_size:d} parallel workers') else: if single: # not yet an option for final drizzle, msg would confuse log.info('Executing serially') # Set parameters for each input and run drizzle on it here. # # Perform drizzling... numctx = 0 for img in imageObjectList: numctx += img._nmembers _numctx = {'all': numctx} # if single: # Determine how many chips make up each single image for img in imageObjectList: for chip in img.returnAllChips(extname=img.scienceExt): plsingle = chip.outputNames['outSingle'] if plsingle in _numctx: _numctx[plsingle] += 1 else: _numctx[plsingle] = 1 # Compute how many planes will be needed for the context image. _nplanes = int((_numctx['all'] - 1) / 32) + 1 # For single drizzling or when context is turned off, # minimize to 1 plane only... if single or imageObjectList[0][1].outputNames['outContext'] in [ None, '', ' ' ]: _nplanes = 1 # # An image buffer needs to be setup for converting the input # arrays (sci and wht) from FITS format to native format # with respect to byteorder and byteswapping. # This buffer should be reused for each input if possible. # _outsci = _outwht = _outctx = _hdrlist = None if (not single) or \ (single and (not run_parallel) and (not imageObjectList[0].inmemory)): # Note there are four cases/combinations for single drizzle alone here: # (not-inmem, serial), (not-inmem, parallel), (inmem, serial), (inmem, parallel) _outsci = np.empty(output_wcs.array_shape, dtype=np.float32) _outsci.fill(maskval) _outwht = np.zeros(output_wcs.array_shape, dtype=np.float32) # initialize context to 3-D array but only pass appropriate plane to drizzle as needed _outctx = np.zeros((_nplanes, ) + output_wcs.array_shape, dtype=np.int32) _hdrlist = [] # Keep track of how many chips have been processed # For single case, this will determine when to close # one product and open the next. _chipIdx = 0 # Remember the name of the 1st image that goes into this particular product # Insure that the header reports the proper values for the start of the # exposure time used to make this; in particular, TIME-OBS and DATE-OBS. template = None # # Work on each image # subprocs = [] for img in imageObjectList: chiplist = img.returnAllChips(extname=img.scienceExt) # How many inputs should go into this product? num_in_prod = _numctx['all'] if single: num_in_prod = _numctx[chiplist[0].outputNames['outSingle']] # The name of the 1st image fnames = [] for chip in chiplist: fnames.append(chip.outputNames['data']) if _chipIdx == 0: template = fnames else: template.extend(fnames) # Work each image, possibly in parallel if run_parallel: # use multiprocessing.Manager only if in parallel and in memory mp_ctx = multiprocessing.get_context('fork') if img.inmemory: manager = mp_ctx.Manager() dproxy = manager.dict( img.virtualOutputs) # copy & wrap it in proxy img.virtualOutputs = dproxy # parallelize run_driz_img (currently for separate drizzle only) p = mp_ctx.Process( target=run_driz_img, name='adrizzle.run_driz_img()', # for err msgs args=(img, chiplist, output_wcs, outwcs, template, paramDict, single, num_in_prod, build, _versions, _numctx, _nplanes, _chipIdx, None, None, None, None, wcsmap)) subprocs.append(p) else: # serial run_driz_img run (either separate drizzle or final drizzle) run_driz_img(img, chiplist, output_wcs, outwcs, template, paramDict, single, num_in_prod, build, _versions, _numctx, _nplanes, _chipIdx, _outsci, _outwht, _outctx, _hdrlist, wcsmap) # Increment/reset master chip counter _chipIdx += len(chiplist) if _chipIdx == num_in_prod: _chipIdx = 0 # do the join if we spawned tasks if run_parallel: mputil.launch_and_wait(subprocs, pool_size) # blocks till all done del _outsci, _outwht, _outctx, _hdrlist
def run_driz(imageObjectList,output_wcs,paramDict,single,build,wcsmap=None): """ Perform drizzle operation on input to create output. The input parameters originally was a list of dictionaries, one for each input, that matches the primary parameters for an ``IRAF`` `drizzle` task. This method would then loop over all the entries in the list and run `drizzle` for each entry. Parameters required for input in paramDict: build,single,units,wt_scl,pixfrac,kernel,fillval, rot,scale,xsh,ysh,blotnx,blotny,outnx,outny,data """ # Insure that input imageObject is a list if not isinstance(imageObjectList, list): imageObjectList = [imageObjectList] # # Setup the versions info dictionary for output to PRIMARY header # The keys will be used as the name reported in the header, as-is # _versions = {'AstroDrizzle':__version__, 'PyFITS':util.__fits_version__, 'Numpy':util.__numpy_version__} # Set sub-sampling rate for drizzling #stepsize = 2.0 log.info(' **Using sub-sampling value of %s for kernel %s' % (paramDict['stepsize'], paramDict['kernel'])) maskval = interpret_maskval(paramDict) outwcs = copy.deepcopy(output_wcs) # Check for existance of output file. if single == False and build == True and fileutil.findFile( imageObjectList[0].outputNames['outFinal']): log.info('Removing previous output product...') os.remove(imageObjectList[0].outputNames['outFinal']) # print out parameters being used for drizzling log.info("Running Drizzle to create output frame with WCS of: ") output_wcs.printwcs() # Will we be running in parallel? pool_size = util.get_pool_size(paramDict.get('num_cores'), len(imageObjectList)) will_parallel = single and pool_size > 1 if will_parallel: log.info('Executing %d parallel workers' % pool_size) else: if single: # not yet an option for final drizzle, msg would confuse log.info('Executing serially') # Set parameters for each input and run drizzle on it here. # # Perform drizzling... numctx = 0 for img in imageObjectList: numctx += img._nmembers _numctx = {'all':numctx} # if single: # Determine how many chips make up each single image for img in imageObjectList: for chip in img.returnAllChips(extname=img.scienceExt): plsingle = chip.outputNames['outSingle'] if plsingle in _numctx: _numctx[plsingle] += 1 else: _numctx[plsingle] = 1 # Compute how many planes will be needed for the context image. _nplanes = int((_numctx['all']-1) / 32) + 1 # For single drizzling or when context is turned off, # minimize to 1 plane only... if single or imageObjectList[0][1].outputNames['outContext'] in [None,'',' ']: _nplanes = 1 # # An image buffer needs to be setup for converting the input # arrays (sci and wht) from FITS format to native format # with respect to byteorder and byteswapping. # This buffer should be reused for each input if possible. # _outsci = _outwht = _outctx = _hdrlist = None if (not single) or \ ( (single) and (not will_parallel) and (not imageObjectList[0].inmemory) ): # Note there are four cases/combinations for single drizzle alone here: # (not-inmem, serial), (not-inmem, parallel), (inmem, serial), (inmem, parallel) #_outsci=np.zeros((output_wcs._naxis2,output_wcs._naxis1),dtype=np.float32) _outsci=np.empty((output_wcs._naxis2,output_wcs._naxis1),dtype=np.float32) _outsci.fill(maskval) _outwht=np.zeros((output_wcs._naxis2,output_wcs._naxis1),dtype=np.float32) # initialize context to 3-D array but only pass appropriate plane to drizzle as needed _outctx=np.zeros((_nplanes,output_wcs._naxis2,output_wcs._naxis1),dtype=np.int32) _hdrlist = [] # Keep track of how many chips have been processed # For single case, this will determine when to close # one product and open the next. _chipIdx = 0 # Remember the name of the 1st image that goes into this particular product # Insure that the header reports the proper values for the start of the # exposure time used to make this; in particular, TIME-OBS and DATE-OBS. template = None # # Work on each image # subprocs = [] for img in imageObjectList: chiplist = img.returnAllChips(extname=img.scienceExt) # How many inputs should go into this product? num_in_prod = _numctx['all'] if single: num_in_prod = _numctx[chiplist[0].outputNames['outSingle']] # The name of the 1st image fnames = [] for chip in chiplist: fnames.append(chip.outputNames['data']) if _chipIdx == 0: template = fnames else: template.extend(fnames) # Work each image, possibly in parallel if will_parallel: # use multiprocessing.Manager only if in parallel and in memory if img.inmemory: manager = multiprocessing.Manager() dproxy = manager.dict(img.virtualOutputs) # copy & wrap it in proxy img.virtualOutputs = dproxy # parallelize run_driz_img (currently for separate drizzle only) p = multiprocessing.Process(target=run_driz_img, name='adrizzle.run_driz_img()', # for err msgs args=(img,chiplist,output_wcs,outwcs,template,paramDict, single,num_in_prod,build,_versions,_numctx,_nplanes, _chipIdx,None,None,None,None,wcsmap)) subprocs.append(p) else: # serial run_driz_img run (either separate drizzle or final drizzle) run_driz_img(img,chiplist,output_wcs,outwcs,template,paramDict, single,num_in_prod,build,_versions,_numctx,_nplanes, _chipIdx,_outsci,_outwht,_outctx,_hdrlist,wcsmap) # Increment/reset master chip counter _chipIdx += len(chiplist) if _chipIdx == num_in_prod: _chipIdx = 0 # do the join if we spawned tasks if will_parallel: mputil.launch_and_wait(subprocs, pool_size) # blocks till all done del _outsci,_outwht,_outctx,_hdrlist
def example_2(): print info("main line") pool_size = multiprocessing.cpu_count() jobs = [multiprocessing.Process(target=greet, args=(n,)) for n in ("bob", "jane")] mputil.launch_and_wait(jobs, pool_size)