def connect_by_pck_(self): ''' ''' from CMGTools.RootTools.utils.getFiles import getFiles redict_aliases = dict(zip(self.aliases.values(), self.aliases.keys())) regex = re.compile(r'(?P<sample>[a-zA-Z0-9_]+[a-zA-Z])(?:[0-9]+)$') for alias_k, alias_v in self.mc_dict.items(): m = regex.match(alias_k) if m and 'QCD' not in alias_k: alias_k = m.group('sample') if alias_k not in self.aliases.values(): continue sample_pck = '*'.join([ '', redict_aliases[alias_k].replace('/', '').replace('.', '*'), self.tier.replace('%', ''), self.pattern + '.pck' ]) cached_sample = glob.glob('/'.join( [self.homedir, '.cmgdataset', sample_pck])) single_mc_list = [alias_v] if len(cached_sample) == 0: print 'sample not cached yet, connecting to the DB' from CMGTools.RootTools.utils.connect import connect connect(single_mc_list, self.tier, self.pattern, self.aliases, cache=self.cache, verbose=self.verbose) elif len(cached_sample) > 1: print 'better specify which sample, many found' print cached_sample raise else: file = open(cached_sample[0]) mycomp = pickle.load(file) single_mc_list[0].files = getFiles( '/'.join([''] + mycomp.lfnDir.split('/') [mycomp.lfnDir.split('/').index('CMG') + 1:]), mycomp.user, self.pattern, useCache=self.cache) print 'attached files to %s' % (single_mc_list[0].name) print 'files %s' % ( '/'.join(single_mc_list[0].files[0].split('/')[:-1] + [self.pattern]))
def connect_by_pck_(self): ''' ''' from CMGTools.RootTools.utils.getFiles import getFiles redict_aliases = dict( zip(self.aliases.values(), self.aliases.keys()) ) regex = re.compile(r'(?P<sample>[a-zA-Z0-9_]+[a-zA-Z])(?:[0-9]+)$') for alias_k, alias_v in self.mc_dict.items(): m = regex.match(alias_k) if m and 'QCD' not in alias_k: alias_k = m.group('sample') if alias_k not in self.aliases.values(): continue sample_pck = '*'.join(['',redict_aliases[alias_k].replace('/','').replace('.','*'), self.tier.replace('%',''),self.pattern+'.pck']) cached_sample = glob.glob('/'.join([self.homedir,'.cmgdataset',sample_pck])) single_mc_list = [alias_v] if len(cached_sample) == 0: print 'sample not cached yet, connecting to the DB' from CMGTools.RootTools.utils.connect import connect connect(single_mc_list, self.tier, self.pattern, self.aliases, cache=self.cache, verbose=self.verbose) elif len(cached_sample) >1: print 'better specify which sample, many found' print cached_sample raise else: file = open(cached_sample[0]) mycomp = pickle.load(file) single_mc_list[0].files = getFiles('/'.join( ['']+mycomp.lfnDir.split('/')[mycomp.lfnDir.split('/').index('CMG')+1:] ), mycomp.user, self.pattern, useCache=self.cache) print 'attached files to %s' %(single_mc_list[0].name) print 'files %s' %('/'.join(single_mc_list[0].files[0].split('/')[:-1]+[self.pattern]))
def connectSample(components, row, filePattern, aliases, cache, verbose): id = row[0] path_name = row[1] file_owner = row[2] info = [] compName = findAlias(path_name, aliases) #import pdb ; pdb.set_trace() if compName is None: print 'WARNING: cannot find alias for', path_name return False findFirstAncestor(id, info) dsInfo = processInfo(info) if verbose: pprint.pprint(dsInfo) path_name = dsInfo[0]['path_name'] globalEff = 1. nEvents = dsInfo.primary_dataset_entries taskurl = 'https://savannah.cern.ch/task/?{task_id}'.format( task_id=dsInfo[0]['task_id']) for step in dsInfo: eff = 0. if step['step'] == 'TAUTAU': eff = step['jobeff'] elif step['step'] == 'MERGE': eff = step['jobeff'] elif step['step'] == 'PATCMG': eff = step['fraction'] if eff is None: eff = step['jobeff'] elif step['step'] == 'PFAOD': eff = 1.0 # not to double count with PATCMG else: eff = step['jobeff'] if eff is None: print 'WARNING: efficiency not determined for', compName eff = 0.0 try: globalEff *= eff except TypeError: pprint.pprint(dsInfo) raise comps = [comp for comp in components if comp.name == compName] if len(comps) > 1: #import pdb ; pdb.set_trace() print 'WARNING find several components for compName', compName print map(str, comps) return False elif len(comps) == 0: print 'WARNING no component found for compName', compName #import pdb; pdb.set_trace() return False comp = comps[0] comp.dataset_entries = dsInfo.dataset_entries if not ( comp.name.startswith('data_') or \ comp.name.startswith('embed_') ): comp.nGenEvents = nEvents if comp.nGenEvents is None: print 'WARNING: nGenEvents is None, setting it to 1.' comp.nGenEvents = 1. if comp.nGenEvents != 1.: comp.nGenEvents *= globalEff else: globalEff = -1. comp.nGenEvents = 0 print 'LOADING:', comp.name, path_name, nEvents, globalEff, taskurl # print dsInfo comp.files = getFiles(path_name, file_owner, filePattern, cache) if comp.name.startswith('data_'): if globalEff < 0.99: print 'ARGH! data sample is not complete.', taskurl print dsInfo else: if globalEff < 0.9: print 'WEIRD! Efficiency is way too low ({globalEff})! you might have to edit your cfg manually.'.format( globalEff=globalEff) print dsInfo
## deltaEta = 3.5, ## cjvPtCut = 30., ## ) treeProducer = cfg.Analyzer( 'PFTreeProducer' ) ############################################################################### from CMGTools.ZJetsTutorial.samples.run2012.ewk import DYJets from CMGTools.RootTools.utils.getFiles import getFiles DYJets.files = getFiles('/DYJetsToLL_M-50_TuneZ2Star_8TeV-madgraph-tarball/Summer12_DR53X-PU_S10_START53_V7A-v1/AODSIM/V5_B/PAT_CMG_V5_16_0', 'cmgtools', '.*root') ############################################################################### MC_list = [DYJets] allsamples = MC_list ## allsamples = [DYJets] ## for c in allsamples: ## c.triggers = [ ## 'HLT_Mu17_Mu8_v16', ## 'HLT_Mu17_Mu8_v17', ## 'HLT_Mu17_Mu8_v18', ## 'HLT_Mu17_Mu8_v19',
from CMGTools.RootTools.RootTools import * WNJetsAna = cfg.Analyzer( 'WNJetsAnalyzer', verbose = False ) ######################################################################################### WJets = cfg.MCComponent( name = 'WJets', files = getFiles('/WJetsToLNu_TuneZ2_7TeV-madgraph-tauola/Fall11-PU_S6_START42_V14B-v1/AODSIM/V5_B/PAT_CMG_V5_6_0_B', 'cmgtools', 'cmgTuple.*root'), xSection = 31314., nGenEvents = 1, triggers = [], effCorrFactor = 1 ) ######################################################################################### selectedComponents = [WJets] sequence = cfg.Sequence( [ WNJetsAna, ] )
def connectSample(components, row, filePattern, aliases, cache, verbose): id = row[0] path_name = row[1] file_owner = row[2] info = [] compName = findAlias(path_name, aliases) #import pdb ; pdb.set_trace() if compName is None: print 'WARNING: cannot find alias for', path_name return False findFirstAncestor(id, info) dsInfo = processInfo(info) if verbose: pprint.pprint( dsInfo ) path_name = dsInfo[0]['path_name'] globalEff = 1. nEvents = dsInfo.primary_dataset_entries taskurl = 'https://savannah.cern.ch/task/?{task_id}'.format(task_id=dsInfo[0]['task_id']) for step in dsInfo: eff = 0. if step['step']=='TAUTAU': eff = step['jobeff'] elif step['step']=='MERGE': eff = step['jobeff'] elif step['step']=='PATCMG': eff = step['fraction'] if eff is None: eff = step['jobeff'] elif step['step']=='PFAOD': eff = 1.0 # not to double count with PATCMG else: eff = step['jobeff'] if eff is None: print 'WARNING: efficiency not determined for',compName eff = 0.0 try: globalEff *= eff except TypeError: pprint.pprint(dsInfo) raise comps = [comp for comp in components if comp.name == compName] if len(comps)>1: #import pdb ; pdb.set_trace() print 'WARNING find several components for compName', compName print map(str, comps) return False elif len(comps)==0: print 'WARNING no component found for compName', compName #import pdb; pdb.set_trace() return False comp = comps[0] comp.dataset_entries = dsInfo.dataset_entries if not ( comp.name.startswith('data_') or \ comp.name.startswith('embed_') ): comp.nGenEvents = nEvents if comp.nGenEvents is None: print 'WARNING: nGenEvents is None, setting it to 1.' comp.nGenEvents = 1. if comp.nGenEvents != 1.: comp.nGenEvents *= globalEff else: globalEff = -1. comp.nGenEvents = 0 print 'LOADING:', comp.name, path_name, nEvents, globalEff, taskurl # print dsInfo comp.files = getFiles(path_name, file_owner, filePattern, cache) if comp.name.startswith('data_'): if globalEff<0.99: print 'ARGH! data sample is not complete.', taskurl print dsInfo else: if globalEff<0.9: print 'WEIRD! Efficiency is way too low ({globalEff})! you might have to edit your cfg manually.'.format(globalEff=globalEff) print dsInfo