import subprocess import config from localdb import dbcursor #htmlDirs = ['/var/www/html', '/afs/cern.ch/user/y/yiiyama/www/metscan'] htmlDirs = ['/var/www/html'] messages = ' <p><span style="color:red;">The system is currently re-scanning the entire dataset.</span></p>\n' messages += ' <p><a href="nov18/index.html">Status as of November 18</a></p>\n' messages += ' <p>Golden JSON used is: ' + config.goldenJson + '</p>\n' messages += ' <p>Silver JSON used is: ' + config.silverJson + '</p>\n' messages += ' <p>Page last updated: ' + time.asctime() + '</p>' dbcursor.execute('SELECT `status`+0 FROM `scanstatus` WHERE `status` LIKE \'done\'') DONE = dbcursor.fetchall()[0][0] dbcursor.execute('SELECT `recoid`, `name` FROM `reconstructions` ORDER BY `recoid`') recos = [(row[0], row[1]) for row in dbcursor] dbcursor.execute('SELECT `datasetid`, `name` FROM `primarydatasets` ORDER BY `name`') pds = [(pdid, pdname) for pdid, pdname in dbcursor] status = dict([(reco[0], dict([(pdid, {}) for pdid, name in pds])) for reco in recos]) dbcursor.execute('SELECT `recoid`, `datasetid`, `run`, `lumi`, `status`+0 FROM `scanstatus`') for recoid, pdid, run, lumi, st in dbcursor: if recoid not in status: status[recoid] = {} if pdid not in status[recoid]: status[recoid][pdid] = {} if run not in status[recoid][pdid]:
from das import dasQuery, datasetList from localdb import dbcursor ### STEP 1 ################################################### ### Find lumisections to be processed from DAS ### ############################################################## recoids = {} for reco in config.reconstructions: dbcursor.execute('SELECT `recoid` FROM `reconstructions` WHERE `name` LIKE %s', (reco,)) if dbcursor.rowcount <= 0: # insert new reconstruction version dbcursor.execute('INSERT INTO `reconstructions` (name) VALUES (%s)', (reco,)) recoids[reco] = dbcursor.lastrowid else: recoids[reco] = dbcursor.fetchall()[0][0] dbcursor.execute('SELECT `datasetid`, `name` FROM `primarydatasets`') knownPDs = dict([(name, datasetid) for datasetid, name in dbcursor]) # list of dataset full names (PD + reconstruction version) # There isn't really a need to query das every time. Providing a hard-coded dataset list is another option.. datasets = datasetList() # DCS-only JSON mask dcsMask = {} for fileName in config.dcsJsons: with open(fileName) as dcsJson: maskJSON = eval(dcsJson.read()) for runStr, lumiRanges in maskJSON.items():
from das import dasQuery, datasetList from localdb import dbcursor ### STEP 1 ################################################### ### Find lumisections to be processed from DAS ### ############################################################## recoids = {} for reco in config.reconstructions: dbcursor.execute('SELECT `recoid` FROM `reconstructions` WHERE `name` LIKE %s', (reco,)) if dbcursor.rowcount <= 0: # insert new reconstruction version dbcursor.execute('INSERT INTO `reconstructions` (name) VALUES (%s)', (reco,)) recoids[reco] = dbcursor.lastrowid else: recoids[reco] = dbcursor.fetchall()[0][0] dbcursor.execute('SELECT `datasetid`, `name` FROM `primarydatasets`') knownPDs = dict([(name, datasetid) for datasetid, name in dbcursor]) # list of dataset full names (PD + reconstruction version) # There isn't really a need to query das every time. Providing a hard-coded dataset list is another option.. datasets = datasetList() for reco in config.reconstructions: print 'Checking for new lumis in', reco recoid = recoids[reco] # loop over primary datasets for pd, recoVersion in [ds for ds in datasets if ds[1][:ds[1].rfind('-v')] == reco]:
#crabConfig.JobType.outputFiles = ['tags.txt', 'eventdata.txt', 'lumis.txt'] crabConfig.Data.splitting = 'LumiBased' #crabConfig.Data.totalUnits = 1 # TESTING crabConfig.Data.outLFNDirBase = config.eosdir.replace('/eos/cms', '') + '/' + timestamp crabConfig.Site.storageSite = 'T2_CH_CERN' try: os.makedirs(crabConfig.General.workArea) except: pass for reco in config.reconstructions: print 'Creating ntuplizer jobs for', reco dbcursor.execute('SELECT `recoid` FROM `reconstructions` WHERE `name` LIKE %s', (reco,)) recoid = dbcursor.fetchall()[0][0] crabConfig.JobType.psetName = config.installdir + '/cmssw/' + config.cmsswbases[reco][1] + '/src/ntuplize.py' for pd, datasetid in knownPDs.items(): dbcursor.execute('SELECT `run`, `lumi` FROM `scanstatus` WHERE `recoid` = %s AND `datasetid` = %s AND (`status` LIKE \'new\' OR `status` LIKE \'failed\') ORDER BY `run`, `lumi`', (recoid, datasetid)) if dbcursor.rowcount <= 0: print ' No job to submit for', pd continue lumis = [(run, lumi) for run, lumi in dbcursor] if len(lumis) == 0: continue print ' ' + pd
for fname in os.listdir('/'.join((config.scratchdir, 'merged', reco, pd))): if fname.endswith('.root'): sourcePaths[reco][pd].append('/'.join((config.scratchdir, 'merged', reco, pd, fname))) nFiles += 1 if NMAX > 0 and nFiles > NMAX: raise MaxFiles except MaxFiles: pass for reco in sourcePaths.keys(): dbcursor.execute('SELECT `recoid` FROM `reconstructions` WHERE `name` LIKE %s', (reco,)) try: recoid = dbcursor.fetchall()[0][0] except IndexError: continue for pd, paths in sourcePaths[reco].items(): dbcursor.execute('SELECT `datasetid` FROM `primarydatasets` WHERE `name` LIKE %s', (pd,)) try: datasetid = dbcursor.fetchall()[0][0] except IndexError: continue for sourcePath in paths: print 'Analyzing', sourcePath status = dumper.dump(sourcePath, recoid, datasetid)