def __init__ ( self , variables , ## list of variables selection , ## Tree-selection cuts = None , name = '' , fullname = '' , silence = False ) : if not name : from ostap.core.core import dsID name = dsID() if not fullname : fullname = name self.__name = name # ## create the logger # from ostap.logger.logger import getLogger self.__logger = logger ## getLogger ( fullname ) # self.__silence = silence ## assert 0 < len(variables) , "Empty list of variables" # ## instantiate the base class # SelectorWithCuts.__init__ ( self , selection ) ## initialize the base self.__cuts = cuts self.__variables = [] self.__varset = ROOT.RooArgSet() self.__triv_vars = True vvars = set() for v in variables : vv = v if isinstance ( v , str ) : vv = Variable ( v ) elif isinstance ( v , ROOT.RooAbsReal ) : vv = Variable ( v ) elif isinstance ( v , ( tuple , list ) ) : vv = Variable ( *v ) elif isinstance ( v , dict ) : vv = Variable ( **v ) elif isinstance ( v , Variable ) : vv = v assert isinstance ( vv , Variable ), 'Invalid variable %s/%s' % ( vv , type ( vv ) ) self.__variables.append ( vv ) self.__varset .add ( vv.var ) # if v.trivial and v.name == v.formula : pass elif v.formula : pass else : self.__triv_vars = False # vvars.add ( vv ) self.__variables = tuple( self.__variables ) self.__triv_sel = valid_formula ( selection , self.__varset ) triv_cuts = not cuts self.__trivial = self.__triv_vars and self.__triv_sel and triv_cuts if not silence : tv = allright ( 'True' ) if self.__triv_vars else attention ( 'False' ) ts = allright ( 'True' ) if self.__triv_sel else attention ( 'False' ) tc = allright ( 'True' ) if triv_cuts else attention ( 'False' ) self.__logger.info ( "Suitable for fast processing: variables:%s, selection:%s, py-cuts:%s" % ( tv , ts , tc ) ) if not self.__silence: nl = 0 dl = 0 for v in self.__variables : nl = max ( nl , len( v.name ) ) dl = max ( dl , len( v.description ) ) dl = max ( dl , len ( 'Description' ) + 2 ) nl = max ( nl , len ( 'Variable' ) + 2 ) line1 = '\n# | %%%ds | %%-%ds | min / max | Trivial? | ' % ( nl , dl ) line2 = '\n# | %%%ds | %%-%ds | %%+11.3g / %%-+11.3g | %%s | ' % ( nl , dl ) the_line = 'Booked %d variables:' % len ( self.variables ) sep = '\n# +%s+%s+%s+%s+' % ( (nl+2)*'-' , (dl+2)*'-' , 27*'-', 10*'-' ) the_line += sep the_line += line1 % ( 'Variable' , 'Description' ) the_line += sep for v in self.__variables : trivial = allright ('True') + 4* ' ' if v.trivial else attention ( 'False' ) + 3 * ' ' fmt = line2 % ( v.name , v.description , v.minmax[0] , v.minmax[1] , trivial ) the_line += fmt the_line += sep self.__logger.info ( the_line ) ## Book dataset self.__data = ROOT.RooDataSet ( ## self.name , fullname , ## self.__varset ) # ## it is still very puzzling for me: should this line be here at all?? ROOT.SetOwnership ( self.__data , False ) self.__progress = None from collections import defaultdict self.__skip = defaultdict(int) self.__notifier = None self.__stat = [ 0 , 0 , 0 ]
def Terminate ( self ) : # if self.__progress : self.__progress.end() # ## Aborted? if 0 != self.GetAbort() : self.__logger.fatal('Selector(%s): process has been aborted!' % self.__name ) self.__data = None del self.__varset del self.__variables self.__varset = () self.__variables = () return ## RETURN ##get total number of input events from base class self.__stat[0] = self.event() if not self.__silence : skipped = 'Skipped:%d' % self.skipped skipped = '/' + attention ( skipped ) if self.skipped else '' cuts = allright ( '"%s"' % self.cuts () ) if self.trivial_sel else attention ( '"%s"' % self.cuts() ) self.__logger.info ( 'Selector(%s): Events Total:%d/Processed:%d%s CUTS: %s' % ( self.__name , self.total , self.processed , skipped , cuts ) ) self.__logger.info ( 'Selector(%s): dataset created:%s' % ( self.__name , self.__data ) ) if self.__data and not self.__silence : vars = [] for v in self.__variables : s = self.__data.statVar( v.name ) mnmx = s.minmax () mean = s.mean () rms = s.rms () r = ( v.name , ## 0 v.description , ## 1 ('%+.5g' % mean.value() ).strip() , ## 2 ('%.5g' % rms ).strip() , ## 3 ('%+.5g' % mnmx[0] ).strip() , ## 4 ('%+.5g' % mnmx[1] ).strip() ) ## 5 s = self.__skip [ v.name] if s : skip = '%-d' % s else : skip = '' r += skip, ## 6 vars.append ( r ) vars.sort() name_l = len ( 'Variable' ) + 2 desc_l = len ( 'Description' ) + 2 mean_l = len ( 'mean' ) + 2 rms_l = len ( 'rms' ) + 2 min_l = len ( 'min' ) + 2 max_l = len ( 'max' ) + 2 skip_l = len ( 'Skip' ) for v in vars : name_l = max ( name_l , len ( v[0] ) ) desc_l = max ( desc_l , len ( v[1] ) ) mean_l = max ( mean_l , len ( v[2] ) ) rms_l = max ( rms_l , len ( v[3] ) ) min_l = max ( min_l , len ( v[4] ) ) max_l = max ( max_l , len ( v[5] ) ) skip_l = max ( skip_l , len ( v[6] ) ) sep = '# -%s+%s+%s+%s+%s-' % ( ( name_l + 2 ) * '-' , ( desc_l + 2 ) * '-' , ( mean_l+rms_l + 5 ) * '-' , ( min_l +max_l + 5 ) * '-' , ( skip_l + 2 ) * '-' ) fmt = '# %%%ds | %%-%ds | %%%ds / %%-%ds | %%%ds / %%-%ds | %%-%ds ' % ( name_l , desc_l , mean_l , rms_l , min_l , max_l , skip_l ) report = 'Dataset(%s) created:' % self.__name report += ' ' + allright ( '%s entries, %s variables' % ( len ( self.__data ) , len ( self.variables ) ) ) if self.trivial_vars : report += ' Vars:' + allright ('trivial' ) + ';' else : report += ' Vars:' + attention ('non-trivial' ) + ';' if self.trivial_sel : report += ' Cuts:' + allright ('trivial' ) + ';' else : report += ' Cuts:' + attention ('non-trivial' ) + ';' if not self.__cuts : report += ' ' + allright ( 'no py-cuts' ) else : report += ' ' + attention ( 'with py-cuts' ) header = fmt % ( 'Variable' , 'Description' , 'mean' , 'rms' , 'min' , 'max' , 'skip' ) report += '\n' + sep report += '\n' + header report += '\n' + sep for v in vars : line = fmt % ( v[0] , v[1] , v[2] , v[3] , v[4] , v[5] , attention ( v[6] ) ) report += '\n' + line report += '\n' + sep self.__logger.info ( report ) if not len ( self.__data ) : skip = 0 for k,v in self.__skip.iteritems() : skip += v self.__logger.warning("Selector(%s): empty dataset! Total:%s/Processed:%s/Skipped:%d" % ( self.__name , self.total , self.processed , skip ) ) ## attention: delete these del self.__varset del self.__variables self.__varset = () self.__variables = ()
def _ds_table_0_(dataset, variables=[], cuts='', first=0, last=2**62): """Print data set as table """ varset = dataset.get() if not valid_pointer(varset): logger.error('Invalid dataset') return '' if isinstance(variables, str): variables = variables.strip() variables = variables.replace(',', ' ') variables = variables.replace(';', ' ') variables = variables.split() if 1 == len(variables): variables = variables[0] if isinstance(variables, str): if variables in varset: vars = [variables] else: vars = list(dataset.branches(variables)) elif variables: vars = [i.GetName() for i in varset if i in variables] else: vars = [i.GetName() for i in varset] # _vars = [] for v in vars: vv = getattr(varset, v) s = dataset.statVar(v, cuts, first, last) mnmx = s.minmax() mean = s.mean() rms = s.rms() r = ( vv.GetName(), ## 0 vv.GetTitle(), ## 1 ('%+.5g' % mean.value()).strip(), ## 2 ('%.5g' % rms).strip(), ## 3 ('%+.5g' % mnmx[0]).strip(), ## 4 ('%+.5g' % mnmx[1]).strip()) ## 5 _vars.append(r) _vars.sort() report = '# %s("%s","%s"):' % (dataset.__class__.__name__, dataset.GetName(), dataset.GetTitle()) report += allright('%d entries, %d variables' % (len(dataset), len(varset))) if not _vars: return report, 120 weight = None if isinstance(dataset, ROOT.RooDataHist): if dataset.isNonPoissonWeighted(): report += attention(' Binned/Weighted') else: report += allright(' Binned') elif dataset.isWeighted(): if dataset.isNonPoissonWeighted(): report += attention(' Weighted') else: report += attention(' Weighted(Poisson)') dstmp = None wvar = None ## 1) try to get the name of the weight variable store = dataset.store() if not valid_pointer(store): store = None if store and not isinstance(store, ROOT.RooTreeDataStore): dstmp = dataset.emptyClone() dstmp.convertToTreeStore() store = dstmp.store() if not valid_pointer(store): store = None if store and hasattr(store, 'tree') and valid_pointer(store.tree()): tree = store.tree() branches = set(tree.branches()) vvars = set([i.GetName() for i in varset]) wvars = branches - vvars if 1 == len(wvars): wvar = wvars.pop() if not wvar: wvar = Ostap.Utils.getWeight(dataset) if wvar: report += attention(' with "%s"' % wvar) store = None if dstmp: dstmp.reset() del dstmp dstmp = None ## 2) if weight name is known, try to get information about the weight if wvar: store = dataset.store() if not valid_pointer(store): store = None if store and not isinstance(store, ROOT.RooTreeDataStore): rargs = ROOT.RooFit.EventRange(first, last), if cuts: ## need all variables dstmp = dataset.reduce(ROOT.RooFit.Cut(cuts), *rargs) else: ## enough to keep only 1 variable vvs = ROOT.RooArgSet(varset[vars[0]]) dstmp = dataset.reduce(ROOT.RooFit.SelectVars(vvs), *rargs) dstmp.convertToTreeStore() store = dstmp.store() cuts, first, last = '', 0, 2**62 if hasattr(store, 'tree') and valid_pointer(store.tree()): tree = store.tree() if wvar in tree.branches(): s = tree.statVar(wvar, cuts, first, last) ## no cuts here... mnmx = s.minmax() mean = s.mean() rms = s.rms() weight = '*%s*' % wvar r = ( weight, ## 0 'Weight variable', ## 1 ('%+.5g' % mean.value()).strip(), ## 2 ('%.5g' % rms).strip(), ## 3 ('%+.5g' % mnmx[0]).strip(), ## 4 ('%+.5g' % mnmx[1]).strip()) ## 5 _vars.append(r) with_weight = True store = None if not dstmp is None: dstmp.reset() del dstmp dstmp = None # ============================================================================================== # build the actual table # ============================================================================================== name_l = len('Variable') + 2 desc_l = len('Description') + 2 mean_l = len('mean') + 2 rms_l = len('rms') + 2 min_l = len('min') + 2 max_l = len('max') + 2 for v in _vars: name_l = max(name_l, len(v[0])) desc_l = max(desc_l, len(v[1])) mean_l = max(mean_l, len(v[2])) rms_l = max(rms_l, len(v[3])) min_l = max(min_l, len(v[4])) max_l = max(max_l, len(v[5])) sep = '# +%s+%s+%s+%s+' % ((name_l + 2) * '-', (desc_l + 2) * '-', (mean_l + rms_l + 5) * '-', (min_l + max_l + 5) * '-') fmt = '# | %%-%ds | %%-%ds | %%%ds / %%-%ds | %%%ds / %%-%ds |' % ( name_l, desc_l, mean_l, rms_l, min_l, max_l) header = fmt % ('Variable', 'Description', 'mean', 'rms', 'min', 'max') report += '\n' + sep report += '\n' + header report += '\n' + sep vlst = _vars if weight: vlst = _vars[:-1] for v in vlst: line = fmt % (v[0], v[1], v[2], v[3], v[4], v[5]) report += '\n' + line report += '\n' + sep if weight: v = _vars[-1] line = fmt % (v[0], v[1], v[2], v[3], v[4], v[5]) report += '\n' + line.replace(weight, attention(weight)) report += '\n' + sep return report, len(sep)