def getfiles(self, refresh=False, url=True, verb=0): """Get list of files from DAS.""" files = self.files if self.refreshable and (not files or refresh): files = [] for path in self.paths: if self.storage: # get files from storage system sepath = repkey(self.storage, PATH=path).replace('//', '/') storage = getstorage(sepath, verb=verb - 1) outlist = storage.getfiles(url=url, verb=verb - 1) else: # get files from DAS dascmd = 'dasgoclient --query="file dataset=%s instance=%s"' % ( path, self.instance) #--limit=0 LOG.verb(repr(dascmd), verb) cmdout = execute(dascmd, verb=verb - 1) outlist = cmdout.split(os.linesep) for line in outlist: # filter root files line = line.strip() if line.endswith('.root') and not any( f.endswith(line) for f in self.blacklist): if url and self.url not in line and 'root://' not in line: line = self.url + line files.append(line) files.sort() # for consistent list order self.files = files return files
def _getnevents(self, das=True, refresh=False, tree='Events', limit=-1, checkfiles=False, verb=0): """Get number of nanoAOD events from DAS (default), or from files on storage system (das=False).""" nevents = self.nevents filenevts = self.filenevts treename = tree if nevents <= 0 or refresh: if checkfiles or (self.storage and not das ): # get number of events from storage system files = self.getfiles(url=True, das=das, refresh=refresh, limit=limit, verb=verb) for fname in files: nevts = getnevents(fname, treename) filenevts[fname] = nevts # cache nevents += nevts LOG.verb( "_getnevents: Found %d events in %r." % (nevts, fname), verb, 3) else: # get number of events from DAS for daspath in self.paths: nevents += getdasnevents(daspath, instance=self.instance, verb=verb - 1) if limit < 0: self.nevents = nevents return nevents, filenevts
def loadjson(cfgname): """Initialize sample from job config JSON file.""" if cfgname.endswith(".json.gz"): with gzip.open(cfgname,'rt') as file: data = file.read().strip() jobcfg = json.loads(data) else: with open(cfgname,'r') as file: jobcfg = json.load(file) for key, value in jobcfg.items(): if isinstance(value,unicode): jobcfg[key] = str(value) for key in ['group','name','paths','try','channel','chunkdict','dtype','extraopts']: LOG.insist(key in jobcfg,"Did not find key '%s' in job configuration %s"%(key,cfgname)) jobcfg['config'] = str(cfgname) jobcfg['chunkdict'] = { int(k): v for k, v in jobcfg['chunkdict'].iteritems() } nfilesperjob = int(jobcfg['nfilesperjob']) filenevts = jobcfg.get('filenevts',{ }) dtype = jobcfg['dtype'] channels = [jobcfg['channel']] opts = [str(s) for s in jobcfg['extraopts']] subtry = int(jobcfg['try']) nevents = int(jobcfg['nevents']) sample = Sample(jobcfg['group'],jobcfg['name'],jobcfg['paths'],dtype=dtype,channels=channels, subtry=subtry,jobcfg=jobcfg,nfilesperjob=nfilesperjob,filenevts=filenevts,nevents=nevents,opts=opts) return sample
def writefiles(self, listname, **kwargs): """Write filenames to text file for fast look up in future.""" writeevts = kwargs.pop('nevts', False) # also write nevents to file listname = repkey(listname, ERA=self.era, GROUP=self.group, SAMPLE=self.name) print ">>> Write list to %r..." % (listname) ensuredir(os.path.dirname(listname)) filenevts = self.getfilenevts(checkfiles=True, ** kwargs) if writeevts else None treename = kwargs.pop('tree', 'Events') files = self.getfiles(**kwargs) with open(listname, 'w+') as lfile: for infile in files: if writeevts: nevts = filenevts.get(infile, -1) if nevts < 0: LOG.warning( "Did not find nevents of %s. Trying again..." % (infile)) nevts = getnevents(infile, treename) infile = "%s:%d" % (infile, nevts ) # write $FILENAM(:NEVTS) lfile.write(infile + '\n')
def dasgoclient(query, **kwargs): """Help function to call dasgoclient and retrieve data set information.""" try: verbosity = kwargs.get('verb', 0) instance = kwargs.get('instance', "") limit = kwargs.get('limit', 0) option = kwargs.get('opts', "") if instance: query += " instance=%s" % (instance) dascmd = 'dasgoclient --query="%s"' % (query) if limit > 0: dascmd += " --limit=%d" % (limit) if option: dascmd += " " + option.strip() LOG.verb(repr(dascmd), verbosity) cmdout = execute(dascmd, verb=verbosity - 1) except CalledProcessError as e: print LOG.error( "Failed to call 'dasgoclient' command. Please make sure:\n" " 1) 'dasgoclient' command exists.\n" " 2) You have a valid VOMS proxy. Use 'voms-proxy-init -voms cms -valid 200:0' or 'source utils/setupVOMS.sh'.\n" " 3) The DAS dataset in '%s' exists!\n" % (dascmd)) raise e return cmdout
def ls(self, *paths, **kwargs): """List contents of given directory.""" verb = kwargs.get('verb', self.verbosity) dryrun = kwargs.get('dry', False) here = kwargs.get('here', False) lscol = kwargs.get('lscol', self.lscol) filters = ensurelist( kwargs.get('filter', []) ) # inclusive filters with glob pattern, like '*' or '[0-9]' wildcards path = self.expandpath(*paths, here=here) retlist = self.execute("%s %s%s" % (self.lscmd, self.lsurl, path), fatal=False, dry=dryrun, verb=verb) delim = '\r\n' if '\r\n' in retlist else '\n' retlist = retlist.split(delim) if isinstance(lscol, int): retlist = [l.split(' ')[lscol] for l in retlist] if retlist and 'No such file or directory' in retlist[0]: LOG.warning(retlist[0]) retlist = [] elif filters: for file in retlist[:]: if not any(fnmatch(file, f) for f in filters): retlist.remove(file) return retlist
def _writefile(ofile,fname,prefix=""): """Help function to write individual files.""" if writeevts: # add nevents at end of infile string nevts = filenevts.get(fname,-1) # retrieve from cache if nevts<0: LOG.warning("Did not find nevents of %s. Trying again..."%(fname)) nevts = getnevents(fname,treename) # get nevents from file fname = "%s:%d"%(fname,nevts) # write $FILENAM(:NEVTS) ofile.write(prefix+fname+'\n')
def file(self, *paths, **kwargs): """Ensure that a given file exists, and append a file URL if needed.""" ensure = kwargs.get('ensure', False) path = self.expandpath(*paths, here=True) if path.startswith(self.parent): path = self.fileurl + path if ensure: if not self.exists(path): LOG.throw(IOError, "Did not find %s." % (path)) return path
def getdasnevents(daspath, **kwargs): """Get number of events.""" dascmd = "summary dataset=%s" % (daspath) cmdout = dasgoclient(dascmd, **kwargs) if "nevents" in cmdout: nevts = int(cmdout.split('"nevents":')[1].split(',')[0]) else: nevts = 0 LOG.warning( "getdasnevents: Could not get number of events from DAS for %r." % (daspath)) return nevts
def writefiles(self,listname,**kwargs): """Write filenames to text file for fast look up in future. If there is more than one DAS dataset path, write lists separately for each path.""" kwargs = kwargs.copy() # do not edit given dictionary writeevts = kwargs.pop('nevts',False) # also write nevents to file listname = repkey(listname,ERA=self.era,GROUP=self.group,SAMPLE=self.name) ensuredir(os.path.dirname(listname)) filenevts = self.getfilenevts(checkfiles=True,**kwargs) if writeevts else None treename = kwargs.pop('tree','Events') # do not pass to Sample.getfiles kwargs.pop('ncores') # do not pass to Sample.getfiles kwargs['refresh'] = False # already got file list in Sample.filenevts files = self.getfiles(**kwargs) # get right URL if not files: LOG.warning("writefiles: Did not find any files!") def _writefile(ofile,fname,prefix=""): """Help function to write individual files.""" if writeevts: # add nevents at end of infile string nevts = filenevts.get(fname,-1) # retrieve from cache if nevts<0: LOG.warning("Did not find nevents of %s. Trying again..."%(fname)) nevts = getnevents(fname,treename) # get nevents from file fname = "%s:%d"%(fname,nevts) # write $FILENAM(:NEVTS) ofile.write(prefix+fname+'\n') paths = self.paths if '$PATH' in listname else [self.paths[0]] for path in paths: listname_ = repkey(listname,PATH=path.strip('/').replace('/','__')) with open(listname_,'w+') as lfile: if '$PATH' in listname: # write only the file list of this path to this text file print ">>> Write %s files to list %r..."%(len(self.pathfiles[path]),listname_) for infile in self.pathfiles[path]: _writefile(lfile,infile) elif len(self.paths)<=1: # write file list for the only path if self.nevents>0: print ">>> Write %s files to list %r..."%(len(files),listname_) else: print ">>> Write %s files (%d events) to list %r..."%(len(files),self.nevents,listname_) for infile in files: _writefile(lfile,infile) else: # divide up list per DAS dataset path if self.nevents>0: print ">>> Write %s files to list %r..."%(len(files),listname_) else: print ">>> Write %s files (%d events) to list %r..."%(len(files),self.nevents,listname_) for i, path in enumerate(self.paths): print ">>> %3s files for %s..."%(len(self.pathfiles[path]),path) lfile.write("DASPATH=%s\n"%(path)) # write special line to text file, which loadfiles() can parse for infile in self.pathfiles[path]: # loop over this list (general list is sorted) LOG.insist(infile in files,"Did not find file %s in general list! %s"%(infile,files)) _writefile(lfile,infile,prefix=" ") if i+1<len(self.paths): # add extra white line between blocks lfile.write("\n")
def ls(self,*paths,**kwargs): """List contents of given directory.""" verb = kwargs.get('verb',self.verbosity) dryrun = kwargs.get('dry', False) filters = ensurelist(kwargs.get('filter',[ ])) # inclusive filters with glob pattern, like '*' or '[0-9]' wildcards path = self.expandpath(*paths) retlist = self.execute("%s %s%s"%(self.lscmd,self.lsurl,path),fatal=False,dry=dryrun,verb=verb).split('\n') if retlist and 'No such file or directory' in retlist[0]: LOG.warning(retlist[0]) retlist = [ ] elif filters: for file in retlist[:]: if not any(fnmatch(file,f) for f in filters): retlist.remove(file) return retlist
def getnevents(self, refresh=False, verb=0): """Get number of files from DAS.""" nevents = self.nevents if nevents <= 0 or refresh: for path in self.paths: dascmd = 'dasgoclient --query="summary dataset=%s instance=%s"' % ( path, self.instance) LOG.verb(repr(dascmd), verb) cmdout = execute(dascmd, verb=verb - 1) if "nevents" in cmdout: ndasevts = int(cmdout.split('"nevents":')[1].split(',')[0]) else: LOG.warning( "Could not get number of events from DAS for %r." % (self.name)) nevents += ndasevts self.nevents = nevents return nevents
def ls(self, *paths, **kwargs): verb = kwargs.get('verb', self.verbosity) dryrun = kwargs.get('dry', False) filter = kwargs.get( 'filter', None) # filter with glob pattern, like '*' or '[0-9]' wildcards path = self.expandpath(*paths) retlist = self.execute("%s %s%s" % (self.lscmd, self.lsurl, path), fatal=False, dry=dryrun, verb=verb).split('\n') if retlist and 'No such file or directory' in retlist[0]: LOG.warning(retlist[0]) retlist = [] elif filter: for file in retlist[:]: if not fnmatch(file, filter): retlist.remove(file) return retlist
def getnevents(self, das=True, refresh=False, treename='Events', verb=0): """Get number of nanoAOD events from DAS (default), or from files on storage system (das=False).""" nevents = self.nevents if nevents <= 0 or refresh: if self.storage and not das: # get number of events from storage system files = self.getfiles(url=True, refresh=refresh, verb=verb) for fname in files: file = ensureTFile(fname) tree = file.Get(treename) if not tree: LOG.warning("getnevents: No %r tree in events in %r!" % ('Events', fname)) continue nevts = tree.GetEntries() file.Close() nevents += nevts LOG.verb( "getnevents: Found %d events in %r." % (nevts, fname), verb, 3) else: # get number of events from DAS for daspath in self.paths: cmdout = dasgoclient("summary dataset=%s instance=%s" % (daspath, self.instance), verb=verb - 1) if "nevents" in cmdout: ndasevts = int( cmdout.split('"nevents":')[1].split(',')[0]) else: ndasevts = 0 LOG.warning( "Could not get number of events from DAS for %r." % (self.name)) nevents += ndasevts self.nevents = nevents return nevents
def loadfiles(self, listname, **kwargs): """Load filenames from text file for fast look up in future.""" listname = repkey(listname, ERA=self.era, GROUP=self.group, SAMPLE=self.name) filenevts = self.filenevts nevents = 0 if self.verbosity + 2 >= 1: print ">>> Loading sample files from '%r'" % (listname) ensurefile(listname, fatal=True) filelist = [] with open(listname, 'r') as file: for line in file: line = line.strip().split() if not line: continue line = line[0].strip() # remove spaces, one per line if line[0] == '#': continue # do not consider out-commented #if v.endswith('.root'): match = fevtsexp.match(line) # match $FILENAM(:NEVTS) if not match: continue infile = match.group(1) if match.group(2): # found nevents in filename nevts = int(match.group(2)) filenevts[infile] = nevts # store/cache in dictionary nevents += nevts filelist.append(infile) if self.nevents <= 0: self.nevents = nevents elif self.nevents != nevents: LOG.warning( "loadfiles: stored nevents=%d does not match the sum total of file events, %d!" % (self.nevents, nevents)) self.nevents == nevents self.files = filelist self.files.sort() return self.files
def getfiles(self,das=False,refresh=False,url=True,limit=-1,verb=0): """Get list of files from storage system (default), or DAS (if no storage system of das=True).""" LOG.verb("getfiles: das=%r, refresh=%r, url=%r, limit=%r, filelist=%r, len(files)=%d, len(filenevts)=%d"%( das,refresh,url,limit,self.filelist,len(self.files),len(self.filenevts)),verb,1) if self.filelist and not self.files: # get file list from text file for first time self.loadfiles(self.filelist) files = self.files # cache for efficiency url_ = self.dasurl if (das and self.storage) else self.url if self.refreshable and (not files or das or refresh): # (re)derive file list if not files or das: LOG.verb("getfiles: Retrieving files...",verb,2) else: LOG.verb("getfiles: Refreshing file list...",verb,2) files = [ ] for daspath in self.paths: # loop over DAS dataset paths self.pathfiles[daspath] = [ ] if (self.storage and not das) or (not self.instance): # get files from storage system postfix = self.postfix+'.root' sepath = repkey(self.storepath,PATH=daspath,DAS=daspath).replace('//','/') outlist = self.storage.getfiles(sepath,url=url,verb=verb-1) if limit>0: outlist = outlist[:limit] else: # get files from DAS postfix = '.root' outlist = getdasfiles(daspath,instance=self.instance,limit=limit,verb=verb-1) for line in outlist: # filter root files line = line.strip() if line.endswith(postfix) and not any(f.endswith(line) for f in self.blacklist): if url and url_ not in line and 'root://' not in line: line = url_+line files.append(line) self.pathfiles[daspath].append(line) self.pathfiles[daspath].sort() if not self.pathfiles[daspath]: LOG.warning("getfiles: Did not find any files for %s"%(daspath)) files.sort() # for consistent list order if not das or not self.storage: self.files = files # store cache for efficiency elif url and any(url_ not in f for f in files): # add url if missing files = [(url_+f if url_ not in f else f) for f in files] elif not url and any(url_ in f for f in files): # remove url files = [f.replace(url_,"") for f in files] return files[:] # pass copy to protect private self.files
def testStorage(path,verb=0): # INITIALIZE LOG.header("__init__") #storage = ensuremodule(system,"PicoProducer.storage" storage = getstorage(path,ensure=True,verb=verb) print ">>> %r"%(storage) print ">>> %-10s = %s"%('path',storage.path) print ">>> %-10s = %s"%('rmcmd',storage.rmcmd) print ">>> %-10s = %s"%('lscmd',storage.lscmd) print ">>> %-10s = %s"%('mkdrcmd',storage.mkdrcmd) print ">>> %-10s = %s"%('cpcmd',storage.cpcmd) print ">>> %-10s = %s"%('tmpdir',storage.tmpdir) print ">>> " # EXPAND PATH LOG.header("expandpath") pathargs = [ ('test.py',), ('$PATH/test.py',), ('foo','bar',), ] pathkwargs = [ {'here':True}, {'here':False}, ] for patharg in pathargs: for pathkwarg in pathkwargs: LOG.color("storage.expandpath(%s,%s)"%(','.join(repr(a) for a in patharg),','.join("%s=%r"%(k,v) for k,v in pathkwarg.iteritems()))) result = storage.expandpath(*patharg,**pathkwarg) print ">>> %r"%(result) # LS LOG.header("ls") LOG.color("storage.ls(verb=%d)"%(verb)) storage.ls(verb=verb) # CP LOG.header("cp") fname = createdummy("testStorage.txt") LOG.color("storage.cp(%r,verb=%d)"%(fname,verb)) storage.cp(fname,verb=verb) storage.ls(verb=verb) # EXISTS LOG.header("exists") LOG.color("storage.exists(%r,verb=%d)"%(fname,verb)) result = storage.exists(fname,verb=verb) print ">>> %r"%(result) storage.ls(verb=verb) # RM LOG.header("rm") LOG.color("storage.rm(%r,verb=%d)"%(fname,verb)) storage.rm(fname,verb=verb) storage.ls(verb=verb) # MKDIR LOG.header("mkdir") dirname = 'test' LOG.color("storage.mkdir(%r.verb=%d)"%(dirname,verb)) storage.mkdir(dirname,verb=verb) storage.ls(verb=verb) storage.ls(dirname,verb=verb) # RM DIRECTORY LOG.header("rm directory") submit = raw_input(">>> Careful! Do you really want to remove %r? [y/n] "%(storage.expandpath(dirname,here=True))) if submit=='y': LOG.color("storage.rm(%r,verb=%d)"%(dirname,verb)) storage.rm(dirname,verb=verb) storage.ls(verb=verb) # HADD LOG.header("hadd") infiles = [createdummyroot("testStorage1.root"),createdummyroot("testStorage2.root")] outfile = "testStorage.root" for tmpdir in [True,]: #False LOG.color("storage.hadd(%r,%r,tmpdir=%s,verb=%d)"%(infiles,outfile,tmpdir,verb)) storage.hadd(infiles,outfile,tmpdir=tmpdir,verb=verb) storage.ls(verb=verb) storage.rm(outfile,verb=verb)
def _getnevents(self,das=True,refresh=False,tree='Events',limit=-1,checkfiles=False,ncores=0,verb=0): """Get number of nanoAOD events from DAS (default), or from files on storage system (das=False).""" LOG.verb("_getnevents: das=%r, refresh=%r, tree=%r, limit=%r, checkfiles=%r, filelist=%r, len(files)=%d, len(filenevts)=%d"%( das,refresh,tree,limit,checkfiles,self.filelist,len(self.files),len(self.filenevts)),verb,1) if self.filelist and not self.files: # get file list from text file for first time self.loadfiles(self.filelist) nevents = self.nevents filenevts = self.filenevts bar = None if nevents<=0 or refresh: if checkfiles or (self.storage and not das): # get number of events per file from storage system LOG.verb("_getnevents: Get events per file (storage=%r, das=%r)..."%(self.storage,das),verb,2) files = self.getfiles(url=True,das=das,refresh=refresh,limit=limit,verb=verb) if verb<=0 and len(files)>=5: bar = LoadingBar(len(files),width=20,pre=">>> Getting number of events: ",counter=True,remove=True) for nevts, fname in iterevts(files,tree,filenevts,refresh,ncores=ncores,verb=verb): filenevts[fname] = nevts # cache nevents += nevts LOG.verb("_getnevents: Found %d events in %r."%(nevts,fname),verb,3) if bar: if self.nevents>0: bar.count("files, %d/%d events (%d%%)"%(nevents,self.nevents,100.0*nevents/self.nevents)) else: bar.count("files, %d events"%(nevents)) else: # get total number of events from DAS LOG.verb("_getnevents: Get total number of events per path (storage=%r, das=%r)..."%(self.storage,das),verb,2) for daspath in self.paths: nevts = getdasnevents(daspath,instance=self.instance,verb=verb-1) LOG.verb("_getnevents: %10d events for %s..."%(nevts,daspath),verb,2) nevents += nevts if limit<=0: self.nevents = nevents else: LOG.verb("_getnevents: Reusing old number of events (nevents=%r, refresh=%r)..."%(nevents,refresh),verb,2) return nevents, filenevts
def __init__(self, group, name, *paths, **kwargs): """Container class for CMSSW samples, e.g.: - group: DY (used to group similar samples in final output) - name: DYJetsToLL_M-50 (used as shorthand and jobname) - path: /DYJetsToLL_M-50_TuneCP5_13TeV-madgraphMLM-pythia8/RunIIAutumn18NanoAODv6_Nano25Oct2019_102X_mcRun2/NANOAODSIM - dtype: 'mc', 'data', 'embed' """ # PATH LOG.insist( len(paths) >= 1, "Need at least one path to create a sample...") if len(paths) == 1 and isinstance(paths[0], list): paths = paths[0] for path in paths: LOG.insist( path.count('/') >= 3 and path.startswith('/'), "DAS path %r has wrong format. Need /SAMPLE/CAMPAIGN/FORMAT." % (path)) #sample = '/'.join(line.split('/')[-3:]) # DATA TYPE dtype = kwargs.get('dtype', None) dtypes = ['mc', 'data', 'embed'] if dtype == None: # automatic recognition path = paths[0] if 'Embed' in path: dtype = 'embed' elif path.endswith('SIM') or any(g in path for g in ['pythia', 'madgraph']): dtype = 'mc' elif re.search(r"/Run20\d\d", path): dtype = 'data' dtype = 'mc' # TODO: remove LOG.insist( dtype in dtypes, "Given data type '%s' is not recongized! Please choose from %s..." % (dtype, ', '.join(dtypes))) # ATTRIBUTES self.group = group self.name = name self.paths = paths # DAS dataset path self.dtype = dtype self.channels = kwargs.get('channel', None) self.channels = kwargs.get('channels', self.channels) self.storage = None self.storepath = kwargs.get('store', None) # if stored elsewhere than DAS self.url = kwargs.get('url', None) # URL if stored elsewhere self.dasurl = kwargs.get( 'dasurl', None) or "root://cms-xrd-global.cern.ch/" # URL for DAS self.blacklist = kwargs.get('blacklist', []) # black list file self.instance = kwargs.get( 'instance', 'prod/phys03' if path.endswith('USER') else 'prod/global') # if None, does not exist in DAS self.nfilesperjob = kwargs.get('nfilesperjob', -1) # number of nanoAOD files per job self.maxevts = kwargs.get( 'maxevtsperjob', -1) # maximum number of events processed per job self.maxevts = kwargs.get( 'maxevts', self.maxevts) # maximum number of events processed per job self.extraopts = kwargs.get( 'opts', [] ) # extra options for analysis module, e.g. ['doZpt=1','tes=1.1'] self.subtry = kwargs.get('subtry', 0) # to help keep track of resubmission self.jobcfg = kwargs.get('jobcfg', {}) # to help keep track of resubmission self.nevents = kwargs.get( 'nevts', 0) # number of nanoAOD events that can be processed self.nevents = kwargs.get('nevents', self.nevents) # cache of number of events self.files = kwargs.get( 'files', []) # list of ROOT files, OR text file with list of files self.filenevts = {} # cache of number of events for each file self.postfix = kwargs.get( 'postfix', None) or "" # post-fix (before '.root') for stored ROOT files self.era = kwargs.get('era', "") # for expansion of $ERA variable self.dosplit = kwargs.get( 'split', len(self.paths) >= 2) # allow splitting (if multiple DAS datasets) self.verbosity = kwargs.get('verbosity', 0) # verbosity level for debugging self.refreshable = not self.files # allow refresh on file list in getfiles() # ENSURE LIST if self.channels != None and not isinstance(self.channels, list): self.channels = [self.channels] if isinstance(self.extraopts, str): if ',' in self.extraopts: self.extraopts = self.extraopts.split(',') self.extraopts = [self.extraopts] # STORAGE & URL DEFAULTS if self.storepath: self.storepath = repkey(self.storepath, USER=_user, ERA=self.era, GROUP=self.group, SAMPLE=self.name) self.storage = getstorage(repkey(self.storepath, PATH=self.paths[0], DAS=self.paths[0]), ensure=False) if not self.dasurl: self.dasurl = self.url if (self.url in dasurls) else dasurls[0] if not self.url: if self.storepath: if self.storage.__class__.__name__ == 'Local': self.url = "" #root://cms-xrd-global.cern.ch/ else: self.url = self.storage.fileurl else: self.url = self.dasurl # GET FILE LIST FROM TEXT FILE if isinstance(self.files, str): self.loadfiles(self.files)
def loadfiles(self,listname_,**kwargs): verbosity = LOG.getverbosity(self,kwargs) """Load filenames from text file for fast look up in future.""" listname = repkey(listname_,ERA=self.era,GROUP=self.group,SAMPLE=self.name) LOG.verb("loadfiles: listname=%r -> %r, len(files)=%d, len(filenevts)=%d"%( listname_,listname,len(self.files),len(self.filenevts)),verbosity,1) filenevts = self.filenevts nevents = 0 #listname = ensurefile(listname,fatal=False) filelist = [ ] paths = self.paths if '$PATH' in listname else [self.paths[0]] for path in paths: listname_ = repkey(listname,PATH=path.strip('/').replace('/','__')) if self.verbosity>=1: print ">>> Loading sample files from %r..."%(listname_) self.pathfiles[path] = [ ] if os.path.isfile(listname_): skip = False subpaths = [ ] # for sanity check with open(listname_,'r') as file: for line in file: line = line.strip().split() # split at space to allow comments at end if not line: continue line = line[0].strip() # remove spaces, consider only first part of the line if line[0]=='#': continue # do not consider comments #if line.endswith('.root'): if line.startswith("DASPATH="): # to keep track of multiple DAS data set paths path = line.split('=')[-1] # DAS data set path LOG.insist(path.count('/')>=3 and path.startswith('/'), "DAS path %r in %s has wrong format. Need /SAMPLE/CAMPAIGN/FORMAT..."%(path,listname_)) if path in self.paths: # store file list for this path self.pathfiles[path] = [ ] subpaths.append(path) skip = False else: # do not store file list for this path skip = True else: if skip: continue # only load files for this sample's DAS dataset paths match = fevtsexp.match(line) # match $FILENAM(:NEVTS) if not match: continue infile = match.group(1) if match.group(2): # found nevents in filename nevts = int(match.group(2)) filenevts[infile] = nevts # store/cache in dictionary nevents += nevts filelist.append(infile) self.pathfiles[path].append(infile) if self.verbosity>=3: print ">>> %7d events for %s"%(nevts,infile) if not filelist: LOG.warning("loadfiles: Did not find any files in %s!"%(listname_)) self.refreshable = True else: # sanity check for empty list for subpath in subpaths: if not self.pathfiles[subpath]: LOG.warning("loadfiles: Did not find any files for path %s in %s!"%(subpath,listname_)) else: LOG.warning("loadfiles: file list %s does not exist!"%(listname_)) self.refreshable = True for path in self.paths: if path not in self.pathfiles: # nonexistent list LOG.warning("loadfiles: Did not find any files for path %s in %s!"%(path,listname)) if self.nevents<=0: self.nevents = nevents elif self.nevents!=nevents: LOG.warning("loadfiles: stored nevents=%d does not match the sum total of file events, %d!"%(self.nevents,nevents)) self.nevents == nevents self.files = filelist self.files.sort() return self.files
'list', 'set', 'rm', 'write', ] for subcmd in subcmds: if args[0] in subcmd[:len(args[0])]: # match abbreviation args[0] = subcmd break args = parser.parse_args(args) if hasattr(args, 'tag') and len(args.tag) >= 1 and args.tag[0] != '_': args.tag = '_' + args.tag # VERBOSITY if args.verbosity >= 2: SLOG.setverbosity(args.verbosity - 1) # SUBCOMMAND MAINs os.chdir(CONFIG.basedir) if args.subcommand == 'install': main_install(args) if args.subcommand == 'list': main_list(args) elif args.subcommand == 'get': main_get(args) elif args.subcommand == 'set': main_set(args) elif args.subcommand == 'write': main_write(args) elif args.subcommand in ['channel', 'era']: main_link(args)
def testStorage(path, readonly=False, hadd=True, verb=0): # INITIALIZE LOG.header("__init__") #storage = ensuremodule(system,"PicoProducer.storage" storage = getstorage(path, ensure=True, verb=verb) print ">>> %r" % (storage) print ">>> %-10s = %s" % ('path', storage.path) print ">>> %-10s = %s" % ('rmcmd', storage.rmcmd) print ">>> %-10s = %s" % ('lscmd', storage.lscmd) print ">>> %-10s = %s" % ('mkdrcmd', storage.mkdrcmd) print ">>> %-10s = %s" % ('cpcmd', storage.cpcmd) print ">>> %-10s = %s" % ('tmpdir', storage.tmpdir) print ">>> " # EXPAND PATH LOG.header("expandpath") pathargs = [ ('test.py', ), ('$PATH/test.py', ), ( 'foo', 'bar', ), ] pathkwargs = [ { 'here': True }, { 'here': False }, ] for patharg in pathargs: for pathkwarg in pathkwargs: LOG.color("storage.expandpath(%s,%s)" % (','.join(repr(a) for a in patharg), ','.join( "%s=%r" % (k, v) for k, v in pathkwarg.iteritems()))) result = storage.expandpath(*patharg, **pathkwarg) print ">>> %r" % (result) # LS LOG.header("ls") LOG.color("storage.ls(verb=%d)" % (verb)) contents = storage.ls(verb=verb) print ">>> Found %d items" % (len(contents)) print ">>> Contents: %s" % (contents) # FILES LOG.header("getfiles") LOG.color("storage.getfiles(verb=%d)" % (verb)) contents = storage.getfiles(verb=verb) print ">>> Found %d items" % (len(contents)) print ">>> Contents: %s" % (contents) print ">>> " LOG.color("storage.getfiles(filter='*.*',verb=%d)" % (verb)) contents = storage.getfiles(filter='*.*', verb=verb) print ">>> Found %d files" % (len(contents)) print ">>> Contents: %s" % (contents) print ">>> " LOG.color("storage.getfiles(filter='*.*',url=None,verb=%d)" % (verb)) contents = storage.getfiles(filter='*.*', url=None, verb=verb) print ">>> Found %d files" % (len(contents)) print ">>> Contents: %s" % (contents) if readonly: print ">>> Read only. Skip test for cp, rm, mkdir, hadd..." return # CP LOG.header("cp") fname = createdummy("testStorage.txt") LOG.color("storage.cp(%r,verb=%d)" % (fname, verb)) storage.cp(fname, verb=verb) storage.ls(verb=verb) # EXISTS LOG.header("exists") LOG.color("storage.exists(%r,verb=%d)" % (fname, verb)) result = storage.exists(fname, verb=verb) print ">>> Exists: %r" % (result) storage.ls(verb=verb) # RM LOG.header("rm") LOG.color("storage.rm(%r,verb=%d)" % (fname, verb)) try: storage.rm(fname, verb=verb) except Exception as error: print error storage.ls(verb=verb) # MKDIR LOG.header("mkdir") dirname = 'test' LOG.color("storage.mkdir(%r.verb=%d)" % (dirname, verb)) try: storage.mkdir(dirname, verb=verb) storage.ls(verb=verb) storage.ls(dirname, here=True, verb=verb) result = storage.exists(dirname, verb=verb) print ">>> Exists: %r" % (result) except Exception as error: print error # RM DIRECTORY LOG.header("rm directory") submit = raw_input(">>> Careful! Do you really want to remove %r? [y/n] " % (storage.expandpath(dirname, here=True))) if submit == 'y': LOG.color("storage.rm(%r,verb=%d)" % (dirname, verb)) try: storage.rm(dirname, verb=verb) storage.ls(verb=verb) except Exception as error: print error # HADD if hadd: LOG.header("hadd") infiles = [ createdummyroot("testStorage1.root"), createdummyroot("testStorage2.root") ] outfile = "testStorage.root" for tmpdir in [ True, ]: #False LOG.color("storage.hadd(%r,%r,tmpdir=%s,verb=%d)" % (infiles, outfile, tmpdir, verb)) try: storage.hadd(infiles, outfile, tmpdir=tmpdir, verb=verb) storage.ls(verb=verb) storage.rm(outfile, verb=verb) except Exception as error: print error