def Process ( self, entry ): """ Fill data set """ # ## == getting the next entry from the tree # if self.GetEntry ( entry ) <= 0 : return 0 ## RETURN # if not self.__progress and not self.__silence : self.__stat[0] = self.fChain.GetEntries() self.__logger.info ( "Selector(%s): processing TChain('%s') #entries: %d" % ( self.name , self.fChain.GetName() , self.total ) ) ## decoration: from ostap.utils.progress_bar import ProgressBar self.__progress = ProgressBar ( max_value = self.total , silent = self.__silence ) if not self.__silence : if 0 == self.processed % 1000 or 0 == entry % 1000 or 0 == self.event() % 1000 : self.__progress.update_amount ( self.event () ) self.__stat[1] += 1 # ## == for more convenience # bamboo = self.fChain return self.fill ( bamboo )
def process(self, task, items, timeout=90000): if not isinstance(task,Task) : raise TypeError("task argument needs to be an 'Task' instance") # --- Call the Local initialialization task.initializeLocal() # --- Schedule all the jobs .... if self.mode == 'cluster' : jobs = [self.server.submit(_prefunction, (_ppfunction, task, item), (), ('GaudiMP.Parallel','time')) for item in items] with ProgressBar ( max_value = len ( items ) , description = "# Job execution:" , silent = self.silent ) as bar : for job in jobs : result, stat = job() task._mergeResults(result) self._mergeStatistics(stat) bar += 1 self._printStatistics() self.server.print_stats() elif self.mode == 'multicore' : start = time.time() jobs = self.pool.map_async(_ppfunction, zip([task for i in items] , items )) with ProgressBar ( max_value = len ( items ) , description = "# Job execution:" , silent = self.silent ) as bar : for result, stat in jobs.get(timeout) : task._mergeResults(result) self._mergeStatistics(stat) bar += 1 end = time.time() if not self.silent : self._printStatistics() logger.info ( 'Time elapsed since server creation %f' % ( end - start ) ) # --- Call the Local Finalize task.finalize()
def process(self, task, items, timeout=90000): if not isinstance(task, Task): raise TypeError("task argument needs to be an 'Task' instance") # --- Call the Local initialialization task.initialize_local() # --- Schedule all the jobs .... start = time.time() from itertools import repeat, count jobs = self.pool.map_async(_ppfunction, zip(repeat(task), count(), items)) with ProgressBar(max_value=len(items), description="# Job execution:", silent=self.silent) as bar: for result, stat in jobs.get(timeout): task.merge_results(result) self.stats += stat bar += 1 end = time.time() if not self.silent: self.print_statistics() logger.info('Time elapsed since server creation %f' % (end - start)) # --- Call the Local Finalize task.finalize() return task.results()
def process(self, task, items, timeout=90000): if not isinstance(task, Task): raise TypeError("task argument needs to be an 'Task' instance") # --- Call the Local initialialization task.initializeLocal() # --- Schedule all the jobs .... if self.mode == 'cluster': from ostap.utils.progress_bar import ProgressBar with ProgressBar(max_value=len(items), silent=self.silent) as bar: jobs = self.pool.uimap(_ppfunction, zip([task for i in items], items)) ##jobs = [self.server.submit(_prefunction, (_ppfunction, task, item), (), ('ROOT','Ostap.ParallelPathos')) for item in items] ##jobs = [self.server.submit(_prefunction, (_ppfunction, task, item), (), ('Ostap.Parallel','time')) for item in items] ##jobs = [self.server.submit(_prefunction, (_ppfunction, task, item), (_ppfunction,), ('Ostap','time')) for item in items] for result, stat in jobs: bar += 1 task._mergeResults(result) self._mergeStatistics(stat) self._printStatistics() self.pp_stats() elif self.mode == 'multicore': start = time.time() from ostap.utils.progress_bar import ProgressBar with ProgressBar(max_value=len(items), silent=self.silent) as bar: jobs = self.pool.uimap(_ppfunction, zip([task for i in items], items)) for result, stat in jobs: bar += 1 task._mergeResults(result) self._mergeStatistics(stat) end = time.time() self._printStatistics() logger.info('Time elapsed since server creation %f' % (end - start)) # --- Call the Local Finalize task.finalize()
def __process_task ( self , task , chunks , **kwargs ) : """Helper internal method to process the task with chunks of data """ from timeit import default_timer as _timer start = _timer() ## inialize the task task.initialize_local () ## mergers for statistics merged_stat = StatMerger () merged_stat_pp = StatMerger () ## start index for jobs index = 0 ## total number of jobs njobs = sum ( len ( c ) for c in chunks ) from ostap.utils.progress_bar import ProgressBar with ProgressBar ( max_value = njobs , silent = self.silent ) as bar : while chunks : chunk = chunks.pop ( 0 ) jobs_args = zip ( repeat ( task ) , count ( index ) , chunk ) for jobid , result , stat in self.iexecute ( task_executor , jobs_args , progress = False ) : ## merge statistics merged_stat += stat ## merge/collect resuls task.merge_results ( result , jobid ) bar += 1 index += len ( chunk ) pp_stat = self.get_pp_stat() if pp_stat : merged_stat_pp += pp_stat ## finalize the task task.finalize () self.print_statistics ( merged_stat_pp , merged_stat , _timer() - start ) ## return task.results ()
def __process(self, task, chunks, **kwargs): """Helper internal method to process the task with chunks of data """ from timeit import default_timer as _timer start = _timer() if isinstance(task, Task): kwargs.pop('merger', None) return self.__process_task(task, chunks, **kwargs) ## mergers for statistics merged_stat = StatMerger() merged_stat_pp = StatMerger() merger = kwargs.pop('merger', TaskMerger()) njobs = sum(len(c) for c in chunks) from ostap.utils.progress_bar import ProgressBar with ProgressBar(max_value=njobs, silent=self.silent) as bar: while chunks: chunk = chunks.pop() from itertools import repeat, count jobs_args = zip(repeat(task), count(), chunk) self.pool.restart(True) jobs = self.pool.uimap(func_executor, jobs_args) del jobs_args for result, stat in jobs: bar += 1 merged_stat += stat merger += result del result del stat merged_stat_pp += self.get_pp_stat() self.pool.close() self.pool.join() ## finalize task what.finalize() self.print_statistics(merged_stat_pp, merged_stat, _timer() - start) ## return merger.results
def add_files(self, files): """ Add files/patterns to data collector """ if isinstance(files, str): files = [files] from ostap.utils.progress_bar import ProgressBar with ProgressBar(max_value=len(files), silent=self.silent) as bar: self.progress = bar for f in files: if 0 >= self.maxfiles: self.treatFile(f) elif len(self.files) < self.maxfiles: self.treatFile(f) else: logger.debug('Maxfiles limit is reached %s ' % self.maxfiles) break
def _iter_cuts_ ( self , cuts , first = 0 , last = _large , progress = False ) : """Iterator over ``good events'' in TTree/TChain: >>> tree = ... # get the tree >>> for i in tree.withCuts ( 'pt>5' ) : print i.y Attention: TTree::GetEntry is already invoked for accepted events, no need in second call """ # last = min ( last , len ( self ) ) pit = cpp.Ostap.PyIterator ( self , cuts , first , last ) if not pit.ok() : raise TypeError ( "Invalid Formula: %s" % cuts ) # from ostap.utils.progress_bar import ProgressBar with ProgressBar ( min_value = first , max_value = last , silent = not progress ) as bar : step = 13.0 * max ( bar.width , 101 ) / ( last - first ) _t = pit.tree() _o = _t while valid_pointer ( _t ) : yield _t _t = pit.next() ## advance to the next entry if progress : current = pit.current() - 1 ## get the current entry index if not _t \ or _t != _o \ or current - first < 120 \ or last - current < 120 \ or 0 == current % 100000 \ or 0 == int ( step * ( current - first ) ) % 5 : ## show progress bar bar.update_amount( current ) _o = _t if progress : bar.update_amount( last ) del pit self.GetEntry(0)
def __process_task(self, task, chunks, **kwargs): """Helper internal method to process the task with chunks of data """ assert isinstance(task, Task), 'Invalid task type %s' % type(task) from timeit import default_timer as _timer start = _timer() ## inialize the task task.initialize_local() ## mergers for statistics merged_stat = StatMerger() merged_stat_pp = StatMerger() njobs = sum(len(c) for c in chunks) from ostap.utils.progress_bar import ProgressBar with ProgressBar(max_value=njobs, silent=self.silent) as bar: while chunks: chunk = chunks.pop() from itertools import repeat, count jobs_args = zip(repeat(task), count(), chunk) self.pool.restart(True) jobs = self.pool.uimap(task_executor, jobs_args) del jobs_args for result, stat in jobs: bar += 1 merged_stat += stat task.merge_results(result) del result del stat merged_stat_pp += self.get_pp_stat() self.pool.close() self.pool.join() task.finalize() self.print_statistics(merged_stat_pp, merged_stat, _timer() - start) ## return task.results()
def add_files ( self , patterns ) : """ Add files/patterns to data collector """ if isinstance ( patterns , str ) : patterns = [ patterns ] _files = set () for pattern in patterns : _added = False for p in protocols : if p in pattern : if not pattern in self.files : _files.add ( pattern ) _added = True break if not _added : for f in glob.iglob ( pattern ) : if not f in self.files : _files.add ( f ) if not self.silent : logger.info ('Loading: %s #patterns/files: %s/%d' % ( self.description , len(patterns) , len( _files ) ) ) ## update list of patterns self.patterns += patterns from ostap.utils.progress_bar import ProgressBar with ProgressBar ( max_value = len(_files) , silent = self.silent ) as bar : self.progress = bar for f in _files : if 0 >= self.maxfiles : self.treatFile ( f ) elif len ( self.files ) < self.maxfiles : self.treatFile ( f ) else : logger.debug ('Maxfiles limit is reached %s ' % self.maxfiles ) break if not self.silent : logger.info ('Loaded: %s' % self )
def add_files(self, files): """ Add files/patterns to data collector """ if isinstance(files, str): files = [files] ## eliminate duplicates and sort files = list(set(files)) files.sort() nf = len(files) max_value = nf if 0 >= self.maxfiles else min(nf, self.maxfiles) from ostap.utils.progress_bar import ProgressBar with ProgressBar(max_value=max_value, silent=self.silent) as bar: self.progress = bar for f in files: if 0 >= self.maxfiles: self.treatFile(f) elif len(self.files) < self.maxfiles: self.treatFile(f) else: logger.debug('Maxfiles limit is reached %s ' % self.maxfiles) break
def _tc_call_ ( self , first = 0 , last = -1 , cuts = None , progress = False ) : """Iterator over ``good events'' in TTree/TChain: >>> tree = ... # get the tree >>> for i in tree(0, 100 , 'pt>5' ) : print i.y """ # if last < 0 : last = ROOT.Tree.kMaxEntries last = min ( last , len ( self ) ) from ostap.utils.progress_bar import ProgressBar with ProgressBar ( min_value = first , max_value = last , silent = not progress ) as bar : step = 13.0 * max ( bar.width , 101 ) / ( last - first ) pit = 1 if cuts : pit = cpp.Ostap.PyIterator ( self , cuts , first , last ) if not pit.ok() : raise TypeError ( "Invalid Formula: %s" % cuts ) # _t = pit.tree() _o = _t while valid_pointer ( _t ) : yield _t ## YIELD _t = pit.next() ## advance to the next entry if progress : current = pit.current() - 1 ## get the current entry index if not _t \ or _t != _o \ or current - first < 120 \ or last - current < 120 \ or 0 == current % 100000 \ or 0 == int ( step * ( current - first ) ) % 5 : ## show progress bar bar.update_amount( current ) _o = _t else : ## just explicit loop for current in range ( first , last + 1 ) : if progress : if current - first < 120 \ or last - current < 120 \ or 0 == current % 100000 \ or 0 == int ( step * ( current - first ) ) % 5 : bar.update_amount( current ) if 0 >= self.GetEntry ( current ) : break yield self ## YIELD! if progress : bar.update_amount( last ) del pit self.GetEntry(0)
class SelectorWithVars(SelectorWithCuts) : """Create and fill the basic dataset for RooFit - Define the list of ``variables'' for selector: >>> variables = [ ... ] Add a variable 'my_name1' from the tree/chain: >>> variables += [ # name descriptor min-value , max-value ... Variable ( 'my_name1' , 'my_description1' , low , high ) ] Get a variable 'my_name' from the tree/chain using the accessor function, e.g. rescale it on-fligh: >>> variables += [ # name descriptor min-value , max-value , access function ... Variable ( 'my_name2' , 'my_description2' , low , high , lambda s : s.my_name2/1000 ) ] Use less trivial expression: >>> variables += [ # name descriptor min-value , max-value , access function ... Variable ( 'my_name3' , 'my_description3' , low , high , lambda s : s.var1+s.var2 ) ] Any callable that gets TChain/Tree and evaluates to double. ( useful case - e.g. it could be TMVAReader) >>> def myvar ( chain ) : ... >>> variables += [ # name descriptor min-value , max-value , access function ... Variable ( 'my_name4' , 'my_description4' , low , high , myvar ) ] Use already booked variables: >>> v5 = ROOT.RooRealVal( 'my_name5' ) >>> variables += [ Variable ( v5 , accessor = lambda s : s.var5 ) ] Add already booked variables: >>> v6 = ROOT.RooRealVal( 'my_name6' ) >>> variables += [ Variable ( v6 ) ] ## get variable 'my_name6' - Finally create selector >>> selector = SelectorWithVars ( ... variables , ... selection = ' chi2vx<30 && pt>2*GeV ' ) ## filtering - Use selector to fill RooDataSet >>> tree = ... >>> chain.process ( selector ) - Get dataset from the selector >>> dataset = selector.data """ ## constructor def __init__ ( self , variables , ## list of variables selection , ## Tree-selection cuts = None , name = '' , fullname = '' , silence = False ) : if not name : from ostap.core.core import dsID name = dsID() if not fullname : fullname = name self.__name = name # ## create the logger # from ostap.logger.logger import getLogger self.__logger = logger ## getLogger ( fullname ) # self.__silence = silence ## assert 0 < len(variables) , "Empty list of variables" # ## instantiate the base class # SelectorWithCuts.__init__ ( self , selection ) ## initialize the base self.__cuts = cuts self.__variables = [] self.__varset = ROOT.RooArgSet() self.__triv_vars = True vvars = set() for v in variables : vv = v if isinstance ( v , str ) : vv = Variable ( v ) elif isinstance ( v , ROOT.RooAbsReal ) : vv = Variable ( v ) elif isinstance ( v , ( tuple , list ) ) : vv = Variable ( *v ) elif isinstance ( v , dict ) : vv = Variable ( **v ) elif isinstance ( v , Variable ) : vv = v assert isinstance ( vv , Variable ), 'Invalid variable %s/%s' % ( vv , type ( vv ) ) self.__variables.append ( vv ) self.__varset .add ( vv.var ) # if v.trivial and v.name == v.formula : pass elif v.formula : pass else : self.__triv_vars = False # vvars.add ( vv ) self.__variables = tuple( self.__variables ) self.__triv_sel = valid_formula ( selection , self.__varset ) triv_cuts = not cuts self.__trivial = self.__triv_vars and self.__triv_sel and triv_cuts if not silence : tv = allright ( 'True' ) if self.__triv_vars else attention ( 'False' ) ts = allright ( 'True' ) if self.__triv_sel else attention ( 'False' ) tc = allright ( 'True' ) if triv_cuts else attention ( 'False' ) self.__logger.info ( "Suitable for fast processing: variables:%s, selection:%s, py-cuts:%s" % ( tv , ts , tc ) ) if not self.__silence: nl = 0 dl = 0 for v in self.__variables : nl = max ( nl , len( v.name ) ) dl = max ( dl , len( v.description ) ) dl = max ( dl , len ( 'Description' ) + 2 ) nl = max ( nl , len ( 'Variable' ) + 2 ) line1 = '\n# | %%%ds | %%-%ds | min / max | Trivial? | ' % ( nl , dl ) line2 = '\n# | %%%ds | %%-%ds | %%+11.3g / %%-+11.3g | %%s | ' % ( nl , dl ) the_line = 'Booked %d variables:' % len ( self.variables ) sep = '\n# +%s+%s+%s+%s+' % ( (nl+2)*'-' , (dl+2)*'-' , 27*'-', 10*'-' ) the_line += sep the_line += line1 % ( 'Variable' , 'Description' ) the_line += sep for v in self.__variables : trivial = allright ('True') + 4* ' ' if v.trivial else attention ( 'False' ) + 3 * ' ' fmt = line2 % ( v.name , v.description , v.minmax[0] , v.minmax[1] , trivial ) the_line += fmt the_line += sep self.__logger.info ( the_line ) ## Book dataset self.__data = ROOT.RooDataSet ( ## self.name , fullname , ## self.__varset ) # ## it is still very puzzling for me: should this line be here at all?? ROOT.SetOwnership ( self.__data , False ) self.__progress = None from collections import defaultdict self.__skip = defaultdict(int) self.__notifier = None self.__stat = [ 0 , 0 , 0 ] @property def name ( self ) : """``name'' - the name of selector/dataset""" return self.__name @property def data ( self ) : """``data'' - the dataset""" return self.__data @data.setter def data ( self , dataset ) : assert isinstance ( dataset , ROOT.RooAbsData ), \ "Incorrect type of data %s/%s " % ( dataset , type ( dataset ) ) self.__logger.debug ("Selector(%s), add dataset %s" % ( self.__name , dataset ) ) self.__data = dataset @property def variables ( self ) : """``variables'' - the list/tuple of variables (cleared in Terminate)""" return self.__variables @property def varset ( self ) : """``varset'' : the structure of RooDataSet""" return self.__varset @property def morecuts ( self ) : """``morecuts'' - additional cust to be applied in selection""" return self.__cuts @property def trivial_vars( self ) : """``trivial_vars'' : are all variables ``trivial'' (suitable for fast-processing)?""" return self.__triv_vars @property def trivial_sel( self ) : """``trivial_sel'' : is the selection ``trivial'' (suitable for fast-processing)?""" return self.__triv_sel @property def trivial ( self ) : """``trivial'' : Are variables/selection/cuts ``trivial'' (suitable for fast-processing)?""" return self.__trivial @property def skip ( self ) : """``skip'' : dictionary of skept entries""" return self.__skip @property def skipped ( self ) : """``skipped'' : total number of skept entries""" return self.__stat[2] @property def processed ( self ) : """``processed'' : number of processeed events (after cuts)""" return self.__stat[1] @property def total ( self ) : """``total'' : total number of processeed events (before cuts)""" return self.__stat[0] @property def stat ( self ) : """``stat'' : Total/processed/skipped events""" return tuple(self.__stat) @stat.setter def stat ( self , value ) : assert 2<= len(value), 'Invalid "value":%s' % str ( value ) self.__stat[0] = value[0] self.__stat[1] = value[1] self.__stat[2] = value[2] ## get the dataset def dataset ( self ) : """ Get the data-set """ return self.__data # ========================================================================= ## the only one actually important method def Process ( self, entry ): """ Fill data set """ # ## == getting the next entry from the tree # if self.GetEntry ( entry ) <= 0 : return 0 ## RETURN # if not self.__progress and not self.__silence : self.__stat[0] = self.fChain.GetEntries() self.__logger.info ( "Selector(%s): processing TChain('%s') #entries: %d" % ( self.name , self.fChain.GetName() , self.total ) ) ## decoration: from ostap.utils.progress_bar import ProgressBar self.__progress = ProgressBar ( max_value = self.total , silent = self.__silence ) if not self.__silence : if 0 == self.processed % 1000 or 0 == entry % 1000 or 0 == self.event() % 1000 : self.__progress.update_amount ( self.event () ) self.__stat[1] += 1 # ## == for more convenience # bamboo = self.fChain return self.fill ( bamboo ) # ========================================================================= ## fill it! def fill ( self , bamboo ) : """The actual processing for the given ``bamboo'' Note that this method is independent on TTree/TChain and can be used directy One just needs to ensure that: - 'accessor functions' for the variables and 'cuts' agree with the type of ``bamboo'' """ ## apply cuts (if needed) if self.__cuts and not self. __cuts ( bamboo ) : return 0 ## loop over all variables for v in self.__variables : var = v.var ## The variable vmin,vmax = v.minmax ## min/max range vfun = v.accessor ## accessor function ## use the accessor function value = vfun ( bamboo ) if not vmin <= value <= vmax : ## MUST BE IN RANGE! self.__skip[v.name] += 1 ## SKIP EVENT self.__stat[2] += 1 ## SKIP EVENT return 0 ## RETURN var.setVal ( value ) self.__data .add ( self.__varset ) return 1 # ========================================================================= ## ``callable'' interface def __call__ ( self , entry ) : """``callable'' interface to Selector """ return self.fill ( entry ) ## termination def Terminate ( self ) : # if self.__progress : self.__progress.end() # ## Aborted? if 0 != self.GetAbort() : self.__logger.fatal('Selector(%s): process has been aborted!' % self.__name ) self.__data = None del self.__varset del self.__variables self.__varset = () self.__variables = () return ## RETURN ##get total number of input events from base class self.__stat[0] = self.event() if not self.__silence : skipped = 'Skipped:%d' % self.skipped skipped = '/' + attention ( skipped ) if self.skipped else '' cuts = allright ( '"%s"' % self.cuts () ) if self.trivial_sel else attention ( '"%s"' % self.cuts() ) self.__logger.info ( 'Selector(%s): Events Total:%d/Processed:%d%s CUTS: %s' % ( self.__name , self.total , self.processed , skipped , cuts ) ) self.__logger.info ( 'Selector(%s): dataset created:%s' % ( self.__name , self.__data ) ) if self.__data and not self.__silence : vars = [] for v in self.__variables : s = self.__data.statVar( v.name ) mnmx = s.minmax () mean = s.mean () rms = s.rms () r = ( v.name , ## 0 v.description , ## 1 ('%+.5g' % mean.value() ).strip() , ## 2 ('%.5g' % rms ).strip() , ## 3 ('%+.5g' % mnmx[0] ).strip() , ## 4 ('%+.5g' % mnmx[1] ).strip() ) ## 5 s = self.__skip [ v.name] if s : skip = '%-d' % s else : skip = '' r += skip, ## 6 vars.append ( r ) vars.sort() name_l = len ( 'Variable' ) + 2 desc_l = len ( 'Description' ) + 2 mean_l = len ( 'mean' ) + 2 rms_l = len ( 'rms' ) + 2 min_l = len ( 'min' ) + 2 max_l = len ( 'max' ) + 2 skip_l = len ( 'Skip' ) for v in vars : name_l = max ( name_l , len ( v[0] ) ) desc_l = max ( desc_l , len ( v[1] ) ) mean_l = max ( mean_l , len ( v[2] ) ) rms_l = max ( rms_l , len ( v[3] ) ) min_l = max ( min_l , len ( v[4] ) ) max_l = max ( max_l , len ( v[5] ) ) skip_l = max ( skip_l , len ( v[6] ) ) sep = '# -%s+%s+%s+%s+%s-' % ( ( name_l + 2 ) * '-' , ( desc_l + 2 ) * '-' , ( mean_l+rms_l + 5 ) * '-' , ( min_l +max_l + 5 ) * '-' , ( skip_l + 2 ) * '-' ) fmt = '# %%%ds | %%-%ds | %%%ds / %%-%ds | %%%ds / %%-%ds | %%-%ds ' % ( name_l , desc_l , mean_l , rms_l , min_l , max_l , skip_l ) report = 'Dataset(%s) created:' % self.__name report += ' ' + allright ( '%s entries, %s variables' % ( len ( self.__data ) , len ( self.variables ) ) ) if self.trivial_vars : report += ' Vars:' + allright ('trivial' ) + ';' else : report += ' Vars:' + attention ('non-trivial' ) + ';' if self.trivial_sel : report += ' Cuts:' + allright ('trivial' ) + ';' else : report += ' Cuts:' + attention ('non-trivial' ) + ';' if not self.__cuts : report += ' ' + allright ( 'no py-cuts' ) else : report += ' ' + attention ( 'with py-cuts' ) header = fmt % ( 'Variable' , 'Description' , 'mean' , 'rms' , 'min' , 'max' , 'skip' ) report += '\n' + sep report += '\n' + header report += '\n' + sep for v in vars : line = fmt % ( v[0] , v[1] , v[2] , v[3] , v[4] , v[5] , attention ( v[6] ) ) report += '\n' + line report += '\n' + sep self.__logger.info ( report ) if not len ( self.__data ) : skip = 0 for k,v in self.__skip.iteritems() : skip += v self.__logger.warning("Selector(%s): empty dataset! Total:%s/Processed:%s/Skipped:%d" % ( self.__name , self.total , self.processed , skip ) ) ## attention: delete these del self.__varset del self.__variables self.__varset = () self.__variables = () def Init ( self, chain ) : # result = SelectorWithCuts.Init ( self , chain ) if self.__progress and not self.__silence : self.__progress.update_amount ( self.event () ) # return result def Begin ( self , tree = None ) : ## result = SelectorWithCuts.Begin ( self , tree ) if self.__progress and not self.__silence : self.__progress.update_amount ( self.event () ) return result # def SlaveBegin ( self , tree ) : # result = SelectorWithCuts.SlaveBegin ( self , tree ) # if self.__progress and not self.__silence : self.__progress.update_amount ( self.event () ) # self.__stat[0] = tree.GetEntries() # if self.__notifier : self.__notifier.exit() del self.__notifier self.__notifier = Ostap.Utils.Notifier( tree ) for v in self.__variables : if isinstance ( v.accessor , ROOT.TObject ) : self.__notifier.add ( v.accessor ) return result # def Notify ( self ) : # result = SelectorWithCuts.Notify ( self ) if self.__progress and not self.__silence : self.__progress.update_amount ( self.event () ) return result def SlaveTerminate ( self ) : # result = SelectorWithCuts.SlaveTerminate ( self ) if self.__progress and not self.__silence : self.__progress.update_amount ( self.event () ) if self.__notifier : self.__notifier.exit() self.__notifier = None return result
def __process_func ( self , task , chunks , **kwargs ) : """Helper internal method for parallel processiing of the plain function with chunks of data """ from ostap.utils.cidict import cidict my_args = cidict( kwargs ) from timeit import default_timer as _timer start = _timer() init = my_args.pop ( 'init' , None ) merger = my_args.pop ( 'merger' , None ) collector = my_args.pop ( 'collector' , None ) ## mergers for statistics & results if not merger and not collector : logger.warning ( "Neither ``merger'' nor ``collector'' are specified for merging!") elif merger and collector : logger.warning ( "Both ``merger'' and ``collector'' are specified for merging!") ## mergers for statistics merged_stat = StatMerger () merged_stat_pp = StatMerger () ## start index for the jobs index = 0 ## initialize the results results = init from ostap.utils.progress_bar import ProgressBar ## total number of jobs njobs = sum ( len ( c ) for c in chunks ) with ProgressBar ( max_value = njobs , silent = self.silent ) as bar : while chunks : chunk = chunks.pop ( 0 ) jobs_args = zip ( repeat ( task ) , count ( index ) , chunk ) ## call for the actual jobs handling method for jobid , result , stat in self.iexecute ( func_executor , jobs_args , progress = False ) : merged_stat += stat ## merge results if merger or collector are provided if merger : results = merger ( results , result ) elif collector : results = collector ( results , result , jobid ) bar += 1 index += len ( chunk ) pp_stat = self.get_pp_stat() if pp_stat : merged_stat_pp += pp_stat ## print statistics self.print_statistics ( merged_stat_pp , merged_stat , _timer() - start ) ## return results