def releasePromptReco(tier0Config, specDirectory, dqmUploadProxy): """ _releasePromptReco_ Called by Tier0Feeder Finds all run/primds that need to be released for PromptReco ( run.end_time + reco_release_config.delay > now AND run.end_time > 0 ) Create workflows and subscriptions for the processing of runs/datasets. """ logging.debug("releasePromptReco()") myThread = threading.currentThread() daoFactory = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=myThread.dbi) insertDatasetScenarioDAO = daoFactory( classname="RunConfig.InsertDatasetScenario") insertCMSSWVersionDAO = daoFactory( classname="RunConfig.InsertCMSSWVersion") insertRecoConfigDAO = daoFactory(classname="RunConfig.InsertRecoConfig") insertStorageNodeDAO = daoFactory(classname="RunConfig.InsertStorageNode") insertPhEDExConfigDAO = daoFactory( classname="RunConfig.InsertPhEDExConfig") releasePromptRecoDAO = daoFactory(classname="RunConfig.ReleasePromptReco") insertWorkflowMonitoringDAO = daoFactory( classname="RunConfig.InsertWorkflowMonitoring") bindsDatasetScenario = [] bindsCMSSWVersion = [] bindsRecoConfig = [] bindsStorageNode = [] bindsReleasePromptReco = [] # mark workflows as injected wmbsDaoFactory = DAOFactory(package="WMCore.WMBS", logger=logging, dbinterface=myThread.dbi) markWorkflowsInjectedDAO = wmbsDaoFactory( classname="Workflow.MarkInjectedWorkflows") # # for creating PromptReco specs # recoSpecs = {} # # for PhEDEx subscription settings # subscriptions = [] findRecoReleaseDAO = daoFactory(classname="RunConfig.FindRecoRelease") recoRelease = findRecoReleaseDAO.execute(transaction=False) for run in sorted(recoRelease.keys()): # retrieve some basic run information getRunInfoDAO = daoFactory(classname="RunConfig.GetRunInfo") runInfo = getRunInfoDAO.execute(run, transaction=False)[0] # retrieve phedex configs for run getPhEDExConfigDAO = daoFactory(classname="RunConfig.GetPhEDExConfig") phedexConfigs = getPhEDExConfigDAO.execute(run, transaction=False) for (dataset, fileset, repackProcVer) in recoRelease[run]: bindsReleasePromptReco.append({ 'RUN': run, 'PRIMDS': dataset, 'NOW': int(time.time()) }) datasetConfig = retrieveDatasetConfig(tier0Config, dataset) bindsDatasetScenario.append({ 'RUN': run, 'PRIMDS': dataset, 'SCENARIO': datasetConfig.Scenario }) if datasetConfig.CMSSWVersion != None: bindsCMSSWVersion.append( {'VERSION': datasetConfig.CMSSWVersion}) alcaSkim = None if len(datasetConfig.AlcaSkims) > 0: alcaSkim = ",".join(datasetConfig.AlcaSkims) dqmSeq = None if len(datasetConfig.DqmSequences) > 0: dqmSeq = ",".join(datasetConfig.DqmSequences) datasetConfig.ScramArch = tier0Config.Global.ScramArches.get( datasetConfig.CMSSWVersion, tier0Config.Global.DefaultScramArch) bindsRecoConfig.append({ 'RUN': run, 'PRIMDS': dataset, 'DO_RECO': int(datasetConfig.DoReco), 'RECO_SPLIT': datasetConfig.RecoSplit, 'WRITE_RECO': int(datasetConfig.WriteRECO), 'WRITE_DQM': int(datasetConfig.WriteDQM), 'WRITE_AOD': int(datasetConfig.WriteAOD), 'PROC_VER': datasetConfig.ProcessingVersion, 'ALCA_SKIM': alcaSkim, 'DQM_SEQ': dqmSeq, 'BLOCK_DELAY': datasetConfig.BlockCloseDelay, 'CMSSW': datasetConfig.CMSSWVersion, 'SCRAM_ARCH': datasetConfig.ScramArch, 'MULTICORE': datasetConfig.Multicore, 'GLOBAL_TAG': datasetConfig.GlobalTag }) phedexConfig = phedexConfigs[dataset] if datasetConfig.WriteAOD: custodialSites = [] nonCustodialSites = [] autoApproveSites = [] if phedexConfig['tape_node'] != None: custodialSites.append(phedexConfig['tape_node']) if phedexConfig['disk_node'] != None: nonCustodialSites.append(phedexConfig['disk_node']) autoApproveSites.append(phedexConfig['disk_node']) subscriptions.append({ 'custodialSites': custodialSites, 'custodialSubType': "Replica", 'nonCustodialSites': nonCustodialSites, 'autoApproveSites': autoApproveSites, 'priority': "high", 'primaryDataset': dataset, 'dataTier': "AOD" }) if len(datasetConfig.AlcaSkims) > 0: if phedexConfig['tape_node'] != None: subscriptions.append({ 'custodialSites': [phedexConfig['tape_node']], 'custodialSubType': "Replica", 'nonCustodialSites': [], 'autoApproveSites': [], 'priority': "high", 'primaryDataset': dataset, 'dataTier': "ALCARECO" }) if datasetConfig.WriteDQM: if phedexConfig['tape_node'] != None: subscriptions.append({ 'custodialSites': [phedexConfig['tape_node']], 'custodialSubType': "Replica", 'nonCustodialSites': [], 'autoApproveSites': [], 'priority': "high", 'primaryDataset': dataset, 'dataTier': tier0Config.Global.DQMDataTier }) if datasetConfig.WriteRECO: if phedexConfig['disk_node'] != None: subscriptions.append({ 'custodialSites': [], 'nonCustodialSites': [phedexConfig['disk_node']], 'autoApproveSites': [phedexConfig['disk_node']], 'priority': "high", 'primaryDataset': dataset, 'dataTier': "RECO" }) writeTiers = [] if datasetConfig.WriteRECO: writeTiers.append("RECO") if datasetConfig.WriteAOD: writeTiers.append("AOD") if datasetConfig.WriteDQM: writeTiers.append(tier0Config.Global.DQMDataTier) if len(datasetConfig.AlcaSkims) > 0: writeTiers.append("ALCARECO") if datasetConfig.DoReco and len(writeTiers) > 0: # # create WMSpec # taskName = "Reco" workflowName = "PromptReco_Run%d_%s" % (run, dataset) specArguments = {} specArguments['TimePerEvent'] = 12 specArguments['SizePerEvent'] = 512 specArguments['Memory'] = 1800 if datasetConfig.Multicore: specArguments['Multicore'] = datasetConfig.Multicore specArguments['Memory'] = 1800 * datasetConfig.Multicore specArguments['RequestPriority'] = 0 specArguments['AcquisitionEra'] = runInfo['acq_era'] specArguments['CMSSWVersion'] = datasetConfig.CMSSWVersion specArguments['ScramArch'] = datasetConfig.ScramArch specArguments['RunNumber'] = run specArguments['SplittingAlgo'] = "EventBased" specArguments['EventsPerJob'] = datasetConfig.RecoSplit specArguments['ProcessingString'] = "PromptReco" specArguments[ 'ProcessingVersion'] = datasetConfig.ProcessingVersion specArguments['Scenario'] = datasetConfig.Scenario specArguments['GlobalTag'] = datasetConfig.GlobalTag specArguments[ 'GlobalTagConnect'] = datasetConfig.GlobalTagConnect specArguments['InputDataset'] = "/%s/%s-%s/RAW" % ( dataset, runInfo['acq_era'], repackProcVer) specArguments['WriteTiers'] = writeTiers specArguments['AlcaSkims'] = datasetConfig.AlcaSkims specArguments['DqmSequences'] = datasetConfig.DqmSequences specArguments[ 'UnmergedLFNBase'] = "/store/unmerged/%s" % runInfo[ 'bulk_data_type'] if runInfo['backfill']: specArguments[ 'MergedLFNBase'] = "/store/backfill/%s/%s" % ( runInfo['backfill'], runInfo['bulk_data_type']) else: specArguments['MergedLFNBase'] = "/store/%s" % runInfo[ 'bulk_data_type'] specArguments['ValidStatus'] = "VALID" specArguments['EnableHarvesting'] = "True" specArguments['DQMUploadProxy'] = dqmUploadProxy specArguments['DQMUploadUrl'] = runInfo['dqmuploadurl'] specArguments[ 'BlockCloseDelay'] = datasetConfig.BlockCloseDelay specArguments['SiteWhitelist'] = datasetConfig.SiteWhitelist specArguments['SiteBlacklist'] = [] specArguments['TrustSitelists'] = "True" factory = PromptRecoWorkloadFactory() wmSpec = factory.factoryWorkloadConstruction( workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(runInfo['bulk_data_loc']) for subscription in subscriptions: wmSpec.setSubscriptionInformation(**subscription) wmSpec.setOwnerDetails( "*****@*****.**", "T0", { 'vogroup': 'DEFAULT', 'vorole': 'DEFAULT', 'dn': "*****@*****.**" }) wmSpec.setupPerformanceMonitoring(maxRSS=10485760, maxVSize=10485760, softTimeout=604800, gracePeriod=3600) wmbsHelper = WMBSHelper(wmSpec, taskName, cachepath=specDirectory) recoSpecs[workflowName] = (wmbsHelper, wmSpec, fileset) try: myThread.transaction.begin() if len(bindsDatasetScenario) > 0: insertDatasetScenarioDAO.execute(bindsDatasetScenario, conn=myThread.transaction.conn, transaction=True) if len(bindsCMSSWVersion) > 0: insertCMSSWVersionDAO.execute(bindsCMSSWVersion, conn=myThread.transaction.conn, transaction=True) if len(bindsRecoConfig) > 0: insertRecoConfigDAO.execute(bindsRecoConfig, conn=myThread.transaction.conn, transaction=True) if len(bindsStorageNode) > 0: insertStorageNodeDAO.execute(bindsStorageNode, conn=myThread.transaction.conn, transaction=True) if len(bindsReleasePromptReco) > 0: releasePromptRecoDAO.execute(bindsReleasePromptReco, conn=myThread.transaction.conn, transaction=True) for (wmbsHelper, wmSpec, fileset) in recoSpecs.values(): wmbsHelper.createSubscription(wmSpec.getTask(taskName), Fileset(id=fileset), alternativeFilesetClose=True) insertWorkflowMonitoringDAO.execute([fileset], conn=myThread.transaction.conn, transaction=True) if len(recoSpecs) > 0: markWorkflowsInjectedDAO.execute(recoSpecs.keys(), injected=True, conn=myThread.transaction.conn, transaction=True) except Exception as ex: logging.exception(ex) myThread.transaction.rollback() raise RuntimeError( "Problem in releasePromptReco() database transaction !") else: myThread.transaction.commit() return
def configureRunStream(tier0Config, run, stream, specDirectory, dqmUploadProxy): """ _configureRunStream_ Called by Tier0Feeder for new run/streams. Retrieve global run settings and build the part of the configuration relevant to run/stream and write it to the database. Create workflows, filesets and subscriptions for the processing of runs/streams. """ logging.debug("configureRunStream() : %d , %s" % (run, stream)) myThread = threading.currentThread() daoFactory = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=myThread.dbi) # retrieve some basic run information getRunInfoDAO = daoFactory(classname="RunConfig.GetRunInfo") runInfo = getRunInfoDAO.execute(run, transaction=False)[0] # treat centralDAQ or miniDAQ runs (have an HLT key) different from local runs if runInfo['hltkey'] != None: # streams not explicitely configured are repacked if stream not in tier0Config.Streams.dictionary_().keys(): addRepackConfig(tier0Config, stream) streamConfig = tier0Config.Streams.dictionary_()[stream] # consistency check to make sure stream exists and has datasets defined # only run if we don't ignore the stream if streamConfig.ProcessingStyle != "Ignore": getStreamDatasetsDAO = daoFactory( classname="RunConfig.GetStreamDatasets") datasets = getStreamDatasetsDAO.execute(run, stream, transaction=False) if len(datasets) == 0: raise RuntimeError( "Stream is not defined in HLT menu or has no datasets !") # write stream/dataset mapping (for special express and error datasets) insertDatasetDAO = daoFactory( classname="RunConfig.InsertPrimaryDataset") insertStreamDatasetDAO = daoFactory( classname="RunConfig.InsertStreamDataset") # write stream configuration insertCMSSWVersionDAO = daoFactory( classname="RunConfig.InsertCMSSWVersion") insertStreamStyleDAO = daoFactory( classname="RunConfig.InsertStreamStyle") insertRepackConfigDAO = daoFactory( classname="RunConfig.InsertRepackConfig") insertPromptCalibrationDAO = daoFactory( classname="RunConfig.InsertPromptCalibration") insertExpressConfigDAO = daoFactory( classname="RunConfig.InsertExpressConfig") insertSpecialDatasetDAO = daoFactory( classname="RunConfig.InsertSpecialDataset") insertDatasetScenarioDAO = daoFactory( classname="RunConfig.InsertDatasetScenario") insertStreamFilesetDAO = daoFactory( classname="RunConfig.InsertStreamFileset") insertRecoReleaseConfigDAO = daoFactory( classname="RunConfig.InsertRecoReleaseConfig") insertWorkflowMonitoringDAO = daoFactory( classname="RunConfig.InsertWorkflowMonitoring") insertStorageNodeDAO = daoFactory( classname="RunConfig.InsertStorageNode") insertPhEDExConfigDAO = daoFactory( classname="RunConfig.InsertPhEDExConfig") bindsCMSSWVersion = [] bindsDataset = [] bindsStreamDataset = [] bindsStreamStyle = { 'RUN': run, 'STREAM': stream, 'STYLE': streamConfig.ProcessingStyle } bindsRepackConfig = {} bindsPromptCalibration = {} bindsExpressConfig = {} bindsSpecialDataset = {} bindsDatasetScenario = [] bindsStorageNode = [] bindsPhEDExConfig = [] # mark workflows as injected wmbsDaoFactory = DAOFactory(package="WMCore.WMBS", logger=logging, dbinterface=myThread.dbi) markWorkflowsInjectedDAO = wmbsDaoFactory( classname="Workflow.MarkInjectedWorkflows") # # for spec creation, details for all outputs # outputModuleDetails = [] # # special dataset for some express output # specialDataset = None # # for PromptReco delay settings # promptRecoDelay = {} promptRecoDelayOffset = {} # # for PhEDEx subscription settings # subscriptions = [] # some hardcoded PhEDEx defaults expressPhEDExInjectNode = "T2_CH_CERN" expressPhEDExSubscribeNode = "T2_CH_CERN" # # first take care of all stream settings # getStreamOnlineVersionDAO = daoFactory( classname="RunConfig.GetStreamOnlineVersion") onlineVersion = getStreamOnlineVersionDAO.execute(run, stream, transaction=False) if streamConfig.ProcessingStyle == "Bulk": streamConfig.Repack.CMSSWVersion = streamConfig.VersionOverride.get( onlineVersion, onlineVersion) bindsCMSSWVersion.append( {'VERSION': streamConfig.Repack.CMSSWVersion}) streamConfig.Repack.ScramArch = tier0Config.Global.ScramArches.get( streamConfig.Repack.CMSSWVersion, tier0Config.Global.DefaultScramArch) bindsRepackConfig = { 'RUN': run, 'STREAM': stream, 'PROC_VER': streamConfig.Repack.ProcessingVersion, 'MAX_SIZE_SINGLE_LUMI': streamConfig.Repack.MaxSizeSingleLumi, 'MAX_SIZE_MULTI_LUMI': streamConfig.Repack.MaxSizeMultiLumi, 'MIN_SIZE': streamConfig.Repack.MinInputSize, 'MAX_SIZE': streamConfig.Repack.MaxInputSize, 'MAX_EDM_SIZE': streamConfig.Repack.MaxEdmSize, 'MAX_OVER_SIZE': streamConfig.Repack.MaxOverSize, 'MAX_EVENTS': streamConfig.Repack.MaxInputEvents, 'MAX_FILES': streamConfig.Repack.MaxInputFiles, 'BLOCK_DELAY': streamConfig.Repack.BlockCloseDelay, 'CMSSW': streamConfig.Repack.CMSSWVersion, 'SCRAM_ARCH': streamConfig.Repack.ScramArch } elif streamConfig.ProcessingStyle == "Express": specialDataset = "Stream%s" % stream bindsDataset.append({'PRIMDS': specialDataset}) bindsStreamDataset.append({ 'RUN': run, 'PRIMDS': specialDataset, 'STREAM': stream }) bindsSpecialDataset = {'STREAM': stream, 'PRIMDS': specialDataset} bindsDatasetScenario.append({ 'RUN': run, 'PRIMDS': specialDataset, 'SCENARIO': streamConfig.Express.Scenario }) if streamConfig.Express.WriteDQM: outputModuleDetails.append({ 'dataTier': tier0Config.Global.DQMDataTier, 'eventContent': tier0Config.Global.DQMDataTier, 'primaryDataset': specialDataset }) bindsStorageNode.append({'NODE': expressPhEDExSubscribeNode}) bindsPhEDExConfig.append({ 'RUN': run, 'PRIMDS': specialDataset, 'ARCHIVAL_NODE': None, 'TAPE_NODE': None, 'DISK_NODE': expressPhEDExSubscribeNode }) subscriptions.append({ 'custodialSites': [], 'nonCustodialSites': [expressPhEDExSubscribeNode], 'autoApproveSites': [expressPhEDExSubscribeNode], 'priority': "high", 'primaryDataset': specialDataset }) alcaSkim = None if len(streamConfig.Express.AlcaSkims) > 0: outputModuleDetails.append({ 'dataTier': "ALCARECO", 'eventContent': "ALCARECO", 'primaryDataset': specialDataset }) alcaSkim = ",".join(streamConfig.Express.AlcaSkims) numPromptCalibProd = 0 for producer in streamConfig.Express.AlcaSkims: if producer.startswith("PromptCalibProd"): numPromptCalibProd += 1 if numPromptCalibProd > 0: bindsPromptCalibration = { 'RUN': run, 'STREAM': stream, 'NUM_PRODUCER': numPromptCalibProd } dqmSeq = None if len(streamConfig.Express.DqmSequences) > 0: dqmSeq = ",".join(streamConfig.Express.DqmSequences) streamConfig.Express.CMSSWVersion = streamConfig.VersionOverride.get( onlineVersion, onlineVersion) bindsCMSSWVersion.append( {'VERSION': streamConfig.Express.CMSSWVersion}) streamConfig.Express.ScramArch = tier0Config.Global.ScramArches.get( streamConfig.Express.CMSSWVersion, tier0Config.Global.DefaultScramArch) streamConfig.Express.RecoScramArch = None if streamConfig.Express.RecoCMSSWVersion != None: bindsCMSSWVersion.append( {'VERSION': streamConfig.Express.RecoCMSSWVersion}) streamConfig.Express.RecoScramArch = tier0Config.Global.ScramArches.get( streamConfig.Express.RecoCMSSWVersion, tier0Config.Global.DefaultScramArch) bindsExpressConfig = { 'RUN': run, 'STREAM': stream, 'PROC_VER': streamConfig.Express.ProcessingVersion, 'WRITE_TIERS': ",".join(streamConfig.Express.DataTiers), 'WRITE_DQM': streamConfig.Express.WriteDQM, 'GLOBAL_TAG': streamConfig.Express.GlobalTag, 'MAX_RATE': streamConfig.Express.MaxInputRate, 'MAX_EVENTS': streamConfig.Express.MaxInputEvents, 'MAX_SIZE': streamConfig.Express.MaxInputSize, 'MAX_FILES': streamConfig.Express.MaxInputFiles, 'MAX_LATENCY': streamConfig.Express.MaxLatency, 'DQM_INTERVAL': streamConfig.Express.PeriodicHarvestInterval, 'BLOCK_DELAY': streamConfig.Express.BlockCloseDelay, 'CMSSW': streamConfig.Express.CMSSWVersion, 'SCRAM_ARCH': streamConfig.Express.ScramArch, 'RECO_CMSSW': streamConfig.Express.RecoCMSSWVersion, 'RECO_SCRAM_ARCH': streamConfig.Express.RecoScramArch, 'MULTICORE': streamConfig.Express.Multicore, 'ALCA_SKIM': alcaSkim, 'DQM_SEQ': dqmSeq } # # then configure datasets # getStreamDatasetTriggersDAO = daoFactory( classname="RunConfig.GetStreamDatasetTriggers") datasetTriggers = getStreamDatasetTriggersDAO.execute( run, stream, transaction=False) for dataset, paths in datasetTriggers.items(): if dataset == "Unassigned path": if stream == "Express" and run in [ 210114, 210116, 210120, 210121, 210178 ]: continue if stream == "A" and run in [216120, 216125, 216130]: continue datasetConfig = retrieveDatasetConfig(tier0Config, dataset) selectEvents = [] for path in sorted(paths): selectEvents.append("%s:%s" % (path, runInfo['process'])) if streamConfig.ProcessingStyle == "Bulk": promptRecoDelay[datasetConfig.Name] = datasetConfig.RecoDelay promptRecoDelayOffset[ datasetConfig.Name] = datasetConfig.RecoDelayOffset outputModuleDetails.append({ 'dataTier': "RAW", 'eventContent': "ALL", 'selectEvents': selectEvents, 'primaryDataset': dataset }) bindsPhEDExConfig.append({ 'RUN': run, 'PRIMDS': dataset, 'ARCHIVAL_NODE': datasetConfig.ArchivalNode, 'TAPE_NODE': datasetConfig.TapeNode, 'DISK_NODE': datasetConfig.DiskNode }) custodialSites = [] nonCustodialSites = [] autoApproveSites = [] if datasetConfig.ArchivalNode != None: bindsStorageNode.append( {'NODE': datasetConfig.ArchivalNode}) custodialSites.append(datasetConfig.ArchivalNode) autoApproveSites.append(datasetConfig.ArchivalNode) if datasetConfig.TapeNode != None: bindsStorageNode.append({'NODE': datasetConfig.TapeNode}) custodialSites.append(datasetConfig.TapeNode) if datasetConfig.DiskNode != None: bindsStorageNode.append({'NODE': datasetConfig.DiskNode}) nonCustodialSites.append(datasetConfig.DiskNode) autoApproveSites.append(datasetConfig.DiskNode) if len(custodialSites) > 0 or len(nonCustodialSites) > 0: subscriptions.append({ 'custodialSites': custodialSites, 'custodialSubType': "Replica", 'nonCustodialSites': nonCustodialSites, 'autoApproveSites': autoApproveSites, 'priority': "high", 'primaryDataset': dataset, 'dataTier': "RAW" }) # # set subscriptions for error dataset # custodialSites = [] nonCustodialSites = [] autoApproveSites = [] if datasetConfig.ArchivalNode != None: custodialSites.append(datasetConfig.ArchivalNode) autoApproveSites.append(datasetConfig.ArchivalNode) if datasetConfig.ArchivalNode != expressPhEDExInjectNode: nonCustodialSites.append(expressPhEDExInjectNode) autoApproveSites.append(expressPhEDExInjectNode) if len(custodialSites) > 0 or len(nonCustodialSites) > 0: subscriptions.append({ 'custodialSites': custodialSites, 'custodialSubType': "Replica", 'nonCustodialSites': nonCustodialSites, 'autoApproveSites': autoApproveSites, 'priority': "high", 'primaryDataset': "%s-Error" % dataset, 'dataTier': "RAW" }) elif streamConfig.ProcessingStyle == "Express": for dataTier in streamConfig.Express.DataTiers: if dataTier not in ["ALCARECO", "DQM", "DQMIO"]: outputModuleDetails.append({ 'dataTier': dataTier, 'eventContent': dataTier, 'selectEvents': selectEvents, 'primaryDataset': dataset }) bindsPhEDExConfig.append({ 'RUN': run, 'PRIMDS': dataset, 'ARCHIVAL_NODE': None, 'TAPE_NODE': None, 'DISK_NODE': expressPhEDExSubscribeNode }) subscriptions.append({ 'custodialSites': [], 'nonCustodialSites': [expressPhEDExSubscribeNode], 'autoApproveSites': [expressPhEDExSubscribeNode], 'priority': "high", 'primaryDataset': dataset }) # # finally create WMSpec # outputs = {} if streamConfig.ProcessingStyle == "Bulk": taskName = "Repack" workflowName = "Repack_Run%d_Stream%s" % (run, stream) specArguments = {} specArguments['TimePerEvent'] = 1 specArguments['SizePerEvent'] = 200 specArguments['Memory'] = 1800 specArguments['RequestPriority'] = 0 specArguments['CMSSWVersion'] = streamConfig.Repack.CMSSWVersion specArguments['ScramArch'] = streamConfig.Repack.ScramArch specArguments[ 'ProcessingVersion'] = streamConfig.Repack.ProcessingVersion specArguments[ 'MaxSizeSingleLumi'] = streamConfig.Repack.MaxSizeSingleLumi specArguments[ 'MaxSizeMultiLumi'] = streamConfig.Repack.MaxSizeMultiLumi specArguments['MinInputSize'] = streamConfig.Repack.MinInputSize specArguments['MaxInputSize'] = streamConfig.Repack.MaxInputSize specArguments['MaxEdmSize'] = streamConfig.Repack.MaxEdmSize specArguments['MaxOverSize'] = streamConfig.Repack.MaxOverSize specArguments[ 'MaxInputEvents'] = streamConfig.Repack.MaxInputEvents specArguments['MaxInputFiles'] = streamConfig.Repack.MaxInputFiles specArguments['UnmergedLFNBase'] = "/store/unmerged/%s" % runInfo[ 'bulk_data_type'] if runInfo['backfill']: specArguments['MergedLFNBase'] = "/store/backfill/%s/%s" % ( runInfo['backfill'], runInfo['bulk_data_type']) else: specArguments[ 'MergedLFNBase'] = "/store/%s" % runInfo['bulk_data_type'] specArguments[ 'BlockCloseDelay'] = streamConfig.Repack.BlockCloseDelay elif streamConfig.ProcessingStyle == "Express": taskName = "Express" workflowName = "Express_Run%d_Stream%s" % (run, stream) specArguments = {} specArguments['TimePerEvent'] = 12 specArguments['SizePerEvent'] = 512 specArguments['Memory'] = 1800 if streamConfig.Express.Multicore: specArguments['Multicore'] = streamConfig.Express.Multicore specArguments['Memory'] = 1800 * streamConfig.Express.Multicore specArguments['RequestPriority'] = 0 specArguments['ProcessingString'] = "Express" specArguments[ 'ProcessingVersion'] = streamConfig.Express.ProcessingVersion specArguments['Scenario'] = streamConfig.Express.Scenario specArguments['CMSSWVersion'] = streamConfig.Express.CMSSWVersion specArguments['ScramArch'] = streamConfig.Express.ScramArch specArguments[ 'RecoCMSSWVersion'] = streamConfig.Express.RecoCMSSWVersion specArguments['RecoScramArch'] = streamConfig.Express.RecoScramArch specArguments['GlobalTag'] = streamConfig.Express.GlobalTag specArguments['GlobalTagTransaction'] = "Express_%d" % run specArguments[ 'GlobalTagConnect'] = streamConfig.Express.GlobalTagConnect specArguments['MaxInputRate'] = streamConfig.Express.MaxInputRate specArguments[ 'MaxInputEvents'] = streamConfig.Express.MaxInputEvents specArguments['MaxInputSize'] = streamConfig.Express.MaxInputSize specArguments['MaxInputFiles'] = streamConfig.Express.MaxInputFiles specArguments['MaxLatency'] = streamConfig.Express.MaxLatency specArguments['AlcaSkims'] = streamConfig.Express.AlcaSkims specArguments['DqmSequences'] = streamConfig.Express.DqmSequences specArguments['AlcaHarvestTimeout'] = runInfo['ah_timeout'] specArguments['AlcaHarvestDir'] = runInfo['ah_dir'] specArguments['DQMUploadProxy'] = dqmUploadProxy specArguments['DQMUploadUrl'] = runInfo['dqmuploadurl'] specArguments['StreamName'] = stream specArguments['SpecialDataset'] = specialDataset specArguments['UnmergedLFNBase'] = "/store/unmerged/express" specArguments['MergedLFNBase'] = "/store/express" if runInfo['backfill']: specArguments[ 'MergedLFNBase'] = "/store/backfill/%s/express" % runInfo[ 'backfill'] else: specArguments['MergedLFNBase'] = "/store/express" specArguments[ 'PeriodicHarvestInterval'] = streamConfig.Express.PeriodicHarvestInterval specArguments[ 'BlockCloseDelay'] = streamConfig.Express.BlockCloseDelay if streamConfig.ProcessingStyle in ['Bulk', 'Express']: specArguments['RunNumber'] = run specArguments['AcquisitionEra'] = tier0Config.Global.AcquisitionEra specArguments['Outputs'] = outputModuleDetails specArguments[ 'OverrideCatalog'] = "trivialcatalog_file:/cvmfs/cms.cern.ch/SITECONF/T2_CH_CERN/Tier0/override_catalog.xml?protocol=override" specArguments['ValidStatus'] = "VALID" specArguments['SiteWhitelist'] = ["T2_CH_CERN_T0"] specArguments['SiteBlacklist'] = [] if streamConfig.ProcessingStyle == "Bulk": factory = RepackWorkloadFactory() wmSpec = factory.factoryWorkloadConstruction( workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(runInfo['bulk_data_loc']) for subscription in subscriptions: wmSpec.setSubscriptionInformation(**subscription) elif streamConfig.ProcessingStyle == "Express": factory = ExpressWorkloadFactory() wmSpec = factory.factoryWorkloadConstruction( workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(expressPhEDExInjectNode) for subscription in subscriptions: wmSpec.setSubscriptionInformation(**subscription) if streamConfig.ProcessingStyle in ['Bulk', 'Express']: wmSpec.setOwnerDetails( "*****@*****.**", "T0", { 'vogroup': 'DEFAULT', 'vorole': 'DEFAULT', 'dn': "*****@*****.**" }) wmSpec.setupPerformanceMonitoring(maxRSS=10485760, maxVSize=10485760, softTimeout=604800, gracePeriod=3600) wmbsHelper = WMBSHelper(wmSpec, taskName, cachepath=specDirectory) filesetName = "Run%d_Stream%s" % (run, stream) fileset = Fileset(filesetName) # # create workflow (currently either repack or express) # try: myThread.transaction.begin() if len(bindsCMSSWVersion) > 0: insertCMSSWVersionDAO.execute(bindsCMSSWVersion, conn=myThread.transaction.conn, transaction=True) if len(bindsDataset) > 0: insertDatasetDAO.execute(bindsDataset, conn=myThread.transaction.conn, transaction=True) if len(bindsStreamDataset) > 0: insertStreamDatasetDAO.execute(bindsStreamDataset, conn=myThread.transaction.conn, transaction=True) if len(bindsRepackConfig) > 0: insertRepackConfigDAO.execute(bindsRepackConfig, conn=myThread.transaction.conn, transaction=True) if len(bindsPromptCalibration) > 0: insertPromptCalibrationDAO.execute( bindsPromptCalibration, conn=myThread.transaction.conn, transaction=True) if len(bindsExpressConfig) > 0: insertExpressConfigDAO.execute(bindsExpressConfig, conn=myThread.transaction.conn, transaction=True) if len(bindsSpecialDataset) > 0: insertSpecialDatasetDAO.execute(bindsSpecialDataset, conn=myThread.transaction.conn, transaction=True) if len(bindsDatasetScenario) > 0: insertDatasetScenarioDAO.execute( bindsDatasetScenario, conn=myThread.transaction.conn, transaction=True) if len(bindsStorageNode) > 0: insertStorageNodeDAO.execute(bindsStorageNode, conn=myThread.transaction.conn, transaction=True) if len(bindsPhEDExConfig) > 0: insertPhEDExConfigDAO.execute(bindsPhEDExConfig, conn=myThread.transaction.conn, transaction=True) insertStreamStyleDAO.execute(bindsStreamStyle, conn=myThread.transaction.conn, transaction=True) if streamConfig.ProcessingStyle in ['Bulk', 'Express']: insertStreamFilesetDAO.execute(run, stream, filesetName, conn=myThread.transaction.conn, transaction=True) fileset.load() wmbsHelper.createSubscription(wmSpec.getTask(taskName), fileset, alternativeFilesetClose=True) insertWorkflowMonitoringDAO.execute( [fileset.id], conn=myThread.transaction.conn, transaction=True) if streamConfig.ProcessingStyle == "Bulk": bindsRecoReleaseConfig = [] for fileset, primds in wmbsHelper.getMergeOutputMapping( ).items(): bindsRecoReleaseConfig.append({ 'RUN': run, 'PRIMDS': primds, 'FILESET': fileset, 'RECODELAY': promptRecoDelay[primds], 'RECODELAYOFFSET': promptRecoDelayOffset[primds] }) insertRecoReleaseConfigDAO.execute( bindsRecoReleaseConfig, conn=myThread.transaction.conn, transaction=True) elif streamConfig.ProcessingStyle == "Express": markWorkflowsInjectedDAO.execute( [workflowName], injected=True, conn=myThread.transaction.conn, transaction=True) except Exception as ex: logging.exception(ex) myThread.transaction.rollback() raise RuntimeError( "Problem in configureRunStream() database transaction !") else: myThread.transaction.commit() else: # should we do anything for local runs ? pass return
def releasePromptReco(tier0Config, specDirectory, dqmUploadProxy): """ _releasePromptReco_ Called by Tier0Feeder Finds all run/primds that need to be released for PromptReco ( run.end_time + reco_release_config.delay > now AND run.end_time > 0 ) Create workflows and subscriptions for the processing of runs/datasets. """ logging.debug("releasePromptReco()") myThread = threading.currentThread() daoFactory = DAOFactory(package = "T0.WMBS", logger = logging, dbinterface = myThread.dbi) insertDatasetScenarioDAO = daoFactory(classname = "RunConfig.InsertDatasetScenario") insertCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion") insertRecoConfigDAO = daoFactory(classname = "RunConfig.InsertRecoConfig") insertStorageNodeDAO = daoFactory(classname = "RunConfig.InsertStorageNode") insertPhEDExConfigDAO = daoFactory(classname = "RunConfig.InsertPhEDExConfig") releasePromptRecoDAO = daoFactory(classname = "RunConfig.ReleasePromptReco") insertWorkflowMonitoringDAO = daoFactory(classname = "RunConfig.InsertWorkflowMonitoring") bindsDatasetScenario = [] bindsCMSSWVersion = [] bindsRecoConfig = [] bindsStorageNode = [] bindsReleasePromptReco = [] # mark workflows as injected wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) markWorkflowsInjectedDAO = wmbsDaoFactory(classname = "Workflow.MarkInjectedWorkflows") # # for creating PromptReco specs # recoSpecs = {} # # for PhEDEx subscription settings # subscriptions = [] findRecoReleaseDAO = daoFactory(classname = "RunConfig.FindRecoRelease") recoRelease = findRecoReleaseDAO.execute(transaction = False) for run in sorted(recoRelease.keys()): # retrieve some basic run information getRunInfoDAO = daoFactory(classname = "RunConfig.GetRunInfo") runInfo = getRunInfoDAO.execute(run, transaction = False)[0] # retrieve phedex configs for run getPhEDExConfigDAO = daoFactory(classname = "RunConfig.GetPhEDExConfig") phedexConfigs = getPhEDExConfigDAO.execute(run, transaction = False) for (dataset, fileset, repackProcVer) in recoRelease[run]: bindsReleasePromptReco.append( { 'RUN' : run, 'PRIMDS' : dataset, 'NOW' : int(time.time()) } ) datasetConfig = retrieveDatasetConfig(tier0Config, dataset) bindsDatasetScenario.append( { 'RUN' : run, 'PRIMDS' : dataset, 'SCENARIO' : datasetConfig.Scenario } ) if datasetConfig.CMSSWVersion != None: bindsCMSSWVersion.append( { 'VERSION' : datasetConfig.CMSSWVersion } ) alcaSkim = None if len(datasetConfig.AlcaSkims) > 0: alcaSkim = ",".join(datasetConfig.AlcaSkims) dqmSeq = None if len(datasetConfig.DqmSequences) > 0: dqmSeq = ",".join(datasetConfig.DqmSequences) datasetConfig.ScramArch = tier0Config.Global.ScramArches.get(datasetConfig.CMSSWVersion, tier0Config.Global.DefaultScramArch) bindsRecoConfig.append( { 'RUN' : run, 'PRIMDS' : dataset, 'DO_RECO' : int(datasetConfig.DoReco), 'RECO_SPLIT' : datasetConfig.RecoSplit, 'WRITE_RECO' : int(datasetConfig.WriteRECO), 'WRITE_DQM' : int(datasetConfig.WriteDQM), 'WRITE_AOD' : int(datasetConfig.WriteAOD), 'PROC_VER' : datasetConfig.ProcessingVersion, 'ALCA_SKIM' : alcaSkim, 'DQM_SEQ' : dqmSeq, 'BLOCK_DELAY' : datasetConfig.BlockCloseDelay, 'CMSSW' : datasetConfig.CMSSWVersion, 'SCRAM_ARCH' : datasetConfig.ScramArch, 'MULTICORE' : datasetConfig.Multicore, 'GLOBAL_TAG' : datasetConfig.GlobalTag } ) phedexConfig = phedexConfigs[dataset] if datasetConfig.WriteAOD: custodialSites = [] nonCustodialSites = [] autoApproveSites = [] if phedexConfig['tape_node'] != None: custodialSites.append(phedexConfig['tape_node']) if phedexConfig['disk_node'] != None: nonCustodialSites.append(phedexConfig['disk_node']) autoApproveSites.append(phedexConfig['disk_node']) subscriptions.append( { 'custodialSites' : custodialSites, 'custodialSubType' : "Replica", 'nonCustodialSites' : nonCustodialSites, 'autoApproveSites' : autoApproveSites, 'priority' : "high", 'primaryDataset' : dataset, 'dataTier' : "AOD" } ) if len(datasetConfig.AlcaSkims) > 0: if phedexConfig['tape_node'] != None: subscriptions.append( { 'custodialSites' : [phedexConfig['tape_node']], 'custodialSubType' : "Replica", 'nonCustodialSites' : [], 'autoApproveSites' : [], 'priority' : "high", 'primaryDataset' : dataset, 'dataTier' : "ALCARECO" } ) if datasetConfig.WriteDQM: if phedexConfig['tape_node'] != None: subscriptions.append( { 'custodialSites' : [phedexConfig['tape_node']], 'custodialSubType' : "Replica", 'nonCustodialSites' : [], 'autoApproveSites' : [], 'priority' : "high", 'primaryDataset' : dataset, 'dataTier' : "DQM" } ) if datasetConfig.WriteRECO: if phedexConfig['disk_node'] != None: subscriptions.append( { 'custodialSites' : [], 'nonCustodialSites' : [phedexConfig['disk_node']], 'autoApproveSites' : [phedexConfig['disk_node']], 'priority' : "high", 'primaryDataset' : dataset, 'dataTier' : "RECO" } ) writeTiers = [] if datasetConfig.WriteRECO: writeTiers.append("RECO") if datasetConfig.WriteAOD: writeTiers.append("AOD") if datasetConfig.WriteDQM: writeTiers.append("DQM") if len(datasetConfig.AlcaSkims) > 0: writeTiers.append("ALCARECO") if datasetConfig.DoReco and len(writeTiers) > 0: # # create WMSpec # taskName = "Reco" workflowName = "PromptReco_Run%d_%s" % (run, dataset) specArguments = {} specArguments['Group'] = "unknown" specArguments['Requestor'] = "unknown" specArguments['RequestorDN'] = "unknown" specArguments['TimePerEvent'] = 12 specArguments['SizePerEvent'] = 512 specArguments['Memory'] = 1800 if datasetConfig.Multicore: specArguments['Multicore'] = datasetConfig.Multicore specArguments['Memory'] = 1800 * datasetConfig.Multicore specArguments['RequestPriority'] = 0 specArguments['AcquisitionEra'] = runInfo['acq_era'] specArguments['CMSSWVersion'] = datasetConfig.CMSSWVersion specArguments['ScramArch'] = datasetConfig.ScramArch specArguments['RunNumber'] = run specArguments['SplittingAlgo'] = "EventBased" specArguments['EventsPerJob'] = datasetConfig.RecoSplit specArguments['ProcessingString'] = "PromptReco" specArguments['ProcessingVersion'] = datasetConfig.ProcessingVersion specArguments['Scenario'] = datasetConfig.Scenario specArguments['GlobalTag'] = datasetConfig.GlobalTag specArguments['GlobalTagConnect'] = datasetConfig.GlobalTagConnect specArguments['InputDataset'] = "/%s/%s-%s/RAW" % (dataset, runInfo['acq_era'], repackProcVer) specArguments['WriteTiers'] = writeTiers specArguments['AlcaSkims'] = datasetConfig.AlcaSkims specArguments['DqmSequences'] = datasetConfig.DqmSequences specArguments['UnmergedLFNBase'] = "/store/unmerged/%s" % runInfo['bulk_data_type'] if runInfo['backfill']: specArguments['MergedLFNBase'] = "/store/backfill/%s/%s" % (runInfo['backfill'], runInfo['bulk_data_type']) else: specArguments['MergedLFNBase'] = "/store/%s" % runInfo['bulk_data_type'] specArguments['ValidStatus'] = "VALID" specArguments['EnableHarvesting'] = "True" specArguments['DQMUploadProxy'] = dqmUploadProxy specArguments['DQMUploadUrl'] = runInfo['dqmuploadurl'] specArguments['BlockCloseDelay'] = datasetConfig.BlockCloseDelay specArguments['SiteWhitelist'] = datasetConfig.SiteWhitelist specArguments['SiteBlacklist'] = [] specArguments['TrustSitelists'] = "True" # not used, but needed by the validation specArguments['CouchURL'] = "http://*****:*****@cern.ch", "T0", { 'vogroup': 'DEFAULT', 'vorole': 'DEFAULT', 'dn' : "*****@*****.**" } ) wmSpec.setupPerformanceMonitoring(maxRSS = 10485760, maxVSize = 10485760, softTimeout = 604800, gracePeriod = 3600) wmbsHelper = WMBSHelper(wmSpec, taskName, cachepath = specDirectory) recoSpecs[workflowName] = (wmbsHelper, wmSpec, fileset) try: myThread.transaction.begin() if len(bindsDatasetScenario) > 0: insertDatasetScenarioDAO.execute(bindsDatasetScenario, conn = myThread.transaction.conn, transaction = True) if len(bindsCMSSWVersion) > 0: insertCMSSWVersionDAO.execute(bindsCMSSWVersion, conn = myThread.transaction.conn, transaction = True) if len(bindsRecoConfig) > 0: insertRecoConfigDAO.execute(bindsRecoConfig, conn = myThread.transaction.conn, transaction = True) if len(bindsStorageNode) > 0: insertStorageNodeDAO.execute(bindsStorageNode, conn = myThread.transaction.conn, transaction = True) if len(bindsReleasePromptReco) > 0: releasePromptRecoDAO.execute(bindsReleasePromptReco, conn = myThread.transaction.conn, transaction = True) for (wmbsHelper, wmSpec, fileset) in recoSpecs.values(): wmbsHelper.createSubscription(wmSpec.getTask(taskName), Fileset(id = fileset), alternativeFilesetClose = True) insertWorkflowMonitoringDAO.execute([fileset], conn = myThread.transaction.conn, transaction = True) if len(recoSpecs) > 0: markWorkflowsInjectedDAO.execute(recoSpecs.keys(), injected = True, conn = myThread.transaction.conn, transaction = True) except Exception as ex: logging.exception(ex) myThread.transaction.rollback() raise RuntimeError("Problem in releasePromptReco() database transaction !") else: myThread.transaction.commit() return
def configureRunStream(tier0Config, run, stream, specDirectory, dqmUploadProxy): """ _configureRunStream_ Called by Tier0Feeder for new run/streams. Retrieve global run settings and build the part of the configuration relevant to run/stream and write it to the database. Create workflows, filesets and subscriptions for the processing of runs/streams. """ logging.debug("configureRunStream() : %d , %s" % (run, stream)) myThread = threading.currentThread() daoFactory = DAOFactory(package = "T0.WMBS", logger = logging, dbinterface = myThread.dbi) # retrieve some basic run information getRunInfoDAO = daoFactory(classname = "RunConfig.GetRunInfo") runInfo = getRunInfoDAO.execute(run, transaction = False)[0] # # treat centralDAQ or miniDAQ runs (have an HLT key) different from local runs # if runInfo['hltkey'] != None: # streams not explicitely configured are repacked if stream not in tier0Config.Streams.dictionary_().keys(): addRepackConfig(tier0Config, stream) streamConfig = tier0Config.Streams.dictionary_()[stream] # consistency check to make sure stream exists and has datasets defined # only run if we don't ignore the stream if streamConfig.ProcessingStyle != "Ignore": getStreamDatasetsDAO = daoFactory(classname = "RunConfig.GetStreamDatasets") datasets = getStreamDatasetsDAO.execute(run, stream, transaction = False) if len(datasets) == 0: raise RuntimeError("Stream is not defined in HLT menu or has no datasets !") # write stream/dataset mapping (for special express and error datasets) insertDatasetDAO = daoFactory(classname = "RunConfig.InsertPrimaryDataset") insertStreamDatasetDAO = daoFactory(classname = "RunConfig.InsertStreamDataset") # write stream configuration insertCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion") insertStreamStyleDAO = daoFactory(classname = "RunConfig.InsertStreamStyle") insertRepackConfigDAO = daoFactory(classname = "RunConfig.InsertRepackConfig") insertPromptCalibrationDAO = daoFactory(classname = "RunConfig.InsertPromptCalibration") insertExpressConfigDAO = daoFactory(classname = "RunConfig.InsertExpressConfig") insertSpecialDatasetDAO = daoFactory(classname = "RunConfig.InsertSpecialDataset") insertDatasetScenarioDAO = daoFactory(classname = "RunConfig.InsertDatasetScenario") insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset") insertRecoReleaseConfigDAO = daoFactory(classname = "RunConfig.InsertRecoReleaseConfig") insertWorkflowMonitoringDAO = daoFactory(classname = "RunConfig.InsertWorkflowMonitoring") insertStorageNodeDAO = daoFactory(classname = "RunConfig.InsertStorageNode") insertPhEDExConfigDAO = daoFactory(classname = "RunConfig.InsertPhEDExConfig") bindsCMSSWVersion = [] bindsDataset = [] bindsStreamDataset = [] bindsStreamStyle = {'RUN' : run, 'STREAM' : stream, 'STYLE': streamConfig.ProcessingStyle } bindsRepackConfig = {} bindsPromptCalibration = {} bindsExpressConfig = {} bindsSpecialDataset = {} bindsDatasetScenario = [] bindsStorageNode = [] bindsPhEDExConfig = [] # mark workflows as injected wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) markWorkflowsInjectedDAO = wmbsDaoFactory(classname = "Workflow.MarkInjectedWorkflows") # # for spec creation, details for all outputs # outputModuleDetails = [] # # special dataset for some express output # specialDataset = None # # for PromptReco delay settings # promptRecoDelay = {} promptRecoDelayOffset = {} # # for PhEDEx subscription settings # subscriptions = [] # some hardcoded PhEDEx defaults expressPhEDExInjectNode = "T2_CH_CERN" expressPhEDExSubscribeNode = "T2_CH_CERN" # # first take care of all stream settings # getStreamOnlineVersionDAO = daoFactory(classname = "RunConfig.GetStreamOnlineVersion") onlineVersion = getStreamOnlineVersionDAO.execute(run, stream, transaction = False) if streamConfig.ProcessingStyle == "Bulk": streamConfig.Repack.CMSSWVersion = streamConfig.VersionOverride.get(onlineVersion, onlineVersion) bindsCMSSWVersion.append( { 'VERSION' : streamConfig.Repack.CMSSWVersion } ) streamConfig.Repack.ScramArch = tier0Config.Global.ScramArches.get(streamConfig.Repack.CMSSWVersion, tier0Config.Global.DefaultScramArch) bindsRepackConfig = { 'RUN' : run, 'STREAM' : stream, 'PROC_VER': streamConfig.Repack.ProcessingVersion, 'MAX_SIZE_SINGLE_LUMI' : streamConfig.Repack.MaxSizeSingleLumi, 'MAX_SIZE_MULTI_LUMI' : streamConfig.Repack.MaxSizeMultiLumi, 'MIN_SIZE' : streamConfig.Repack.MinInputSize, 'MAX_SIZE' : streamConfig.Repack.MaxInputSize, 'MAX_EDM_SIZE' : streamConfig.Repack.MaxEdmSize, 'MAX_OVER_SIZE' : streamConfig.Repack.MaxOverSize, 'MAX_EVENTS' : streamConfig.Repack.MaxInputEvents, 'MAX_FILES' : streamConfig.Repack.MaxInputFiles, 'BLOCK_DELAY' : streamConfig.Repack.BlockCloseDelay, 'CMSSW' : streamConfig.Repack.CMSSWVersion, 'SCRAM_ARCH' : streamConfig.Repack.ScramArch } elif streamConfig.ProcessingStyle == "Express": specialDataset = "Stream%s" % stream bindsDataset.append( { 'PRIMDS' : specialDataset } ) bindsStreamDataset.append( { 'RUN' : run, 'PRIMDS' : specialDataset, 'STREAM' : stream } ) bindsSpecialDataset = { 'STREAM' : stream, 'PRIMDS' : specialDataset } bindsDatasetScenario.append( { 'RUN' : run, 'PRIMDS' : specialDataset, 'SCENARIO' : streamConfig.Express.Scenario } ) if "DQM" in streamConfig.Express.DataTiers: outputModuleDetails.append( { 'dataTier' : "DQM", 'eventContent' : "DQM", 'primaryDataset' : specialDataset } ) bindsStorageNode.append( { 'NODE' : expressPhEDExSubscribeNode } ) bindsPhEDExConfig.append( { 'RUN' : run, 'PRIMDS' : specialDataset, 'ARCHIVAL_NODE' : None, 'TAPE_NODE' : None, 'DISK_NODE' : expressPhEDExSubscribeNode } ) subscriptions.append( { 'custodialSites' : [], 'nonCustodialSites' : [ expressPhEDExSubscribeNode ], 'autoApproveSites' : [ expressPhEDExSubscribeNode ], 'priority' : "high", 'primaryDataset' : specialDataset } ) alcaSkim = None if "ALCARECO" in streamConfig.Express.DataTiers: if len(streamConfig.Express.AlcaSkims) > 0: outputModuleDetails.append( { 'dataTier' : "ALCARECO", 'eventContent' : "ALCARECO", 'primaryDataset' : specialDataset } ) alcaSkim = ",".join(streamConfig.Express.AlcaSkims) numPromptCalibProd = 0 for producer in streamConfig.Express.AlcaSkims: if producer.startswith("PromptCalibProd"): numPromptCalibProd += 1 if numPromptCalibProd > 0: bindsPromptCalibration = { 'RUN' : run, 'STREAM' : stream, 'NUM_PRODUCER' : numPromptCalibProd } dqmSeq = None if len(streamConfig.Express.DqmSequences) > 0: dqmSeq = ",".join(streamConfig.Express.DqmSequences) streamConfig.Express.CMSSWVersion = streamConfig.VersionOverride.get(onlineVersion, onlineVersion) bindsCMSSWVersion.append( { 'VERSION' : streamConfig.Express.CMSSWVersion } ) streamConfig.Express.ScramArch = tier0Config.Global.ScramArches.get(streamConfig.Express.CMSSWVersion, tier0Config.Global.DefaultScramArch) streamConfig.Express.RecoScramArch = None if streamConfig.Express.RecoCMSSWVersion != None: bindsCMSSWVersion.append( { 'VERSION' : streamConfig.Express.RecoCMSSWVersion } ) streamConfig.Express.RecoScramArch = tier0Config.Global.ScramArches.get(streamConfig.Express.RecoCMSSWVersion, tier0Config.Global.DefaultScramArch) bindsExpressConfig = { 'RUN' : run, 'STREAM' : stream, 'PROC_VER' : streamConfig.Express.ProcessingVersion, 'WRITE_TIERS' : ",".join(streamConfig.Express.DataTiers), 'GLOBAL_TAG' : streamConfig.Express.GlobalTag, 'MAX_RATE' : streamConfig.Express.MaxInputRate, 'MAX_EVENTS' : streamConfig.Express.MaxInputEvents, 'MAX_SIZE' : streamConfig.Express.MaxInputSize, 'MAX_FILES' : streamConfig.Express.MaxInputFiles, 'MAX_LATENCY' : streamConfig.Express.MaxLatency, 'DQM_INTERVAL' : streamConfig.Express.PeriodicHarvestInterval, 'BLOCK_DELAY' : streamConfig.Express.BlockCloseDelay, 'CMSSW' : streamConfig.Express.CMSSWVersion, 'SCRAM_ARCH' : streamConfig.Express.ScramArch, 'RECO_CMSSW' : streamConfig.Express.RecoCMSSWVersion, 'RECO_SCRAM_ARCH' : streamConfig.Express.RecoScramArch, 'MULTICORE' : streamConfig.Express.Multicore, 'ALCA_SKIM' : alcaSkim, 'DQM_SEQ' : dqmSeq } # # then configure datasets # getStreamDatasetTriggersDAO = daoFactory(classname = "RunConfig.GetStreamDatasetTriggers") datasetTriggers = getStreamDatasetTriggersDAO.execute(run, stream, transaction = False) for dataset, paths in datasetTriggers.items(): if dataset == "Unassigned path": if stream == "Express" and run in [ 210114, 210116, 210120, 210121, 210178 ]: continue if stream == "A" and run in [ 216120, 216125, 216130 ]: continue datasetConfig = retrieveDatasetConfig(tier0Config, dataset) selectEvents = [] for path in sorted(paths): selectEvents.append("%s:%s" % (path, runInfo['process'])) if streamConfig.ProcessingStyle == "Bulk": promptRecoDelay[datasetConfig.Name] = datasetConfig.RecoDelay promptRecoDelayOffset[datasetConfig.Name] = datasetConfig.RecoDelayOffset outputModuleDetails.append( { 'dataTier' : "RAW", 'eventContent' : "ALL", 'selectEvents' : selectEvents, 'primaryDataset' : dataset } ) bindsPhEDExConfig.append( { 'RUN' : run, 'PRIMDS' : dataset, 'ARCHIVAL_NODE' : datasetConfig.ArchivalNode, 'TAPE_NODE' : datasetConfig.TapeNode, 'DISK_NODE' : datasetConfig.DiskNode } ) custodialSites = [] nonCustodialSites = [] autoApproveSites = [] if datasetConfig.ArchivalNode != None: bindsStorageNode.append( { 'NODE' : datasetConfig.ArchivalNode } ) custodialSites.append(datasetConfig.ArchivalNode) autoApproveSites.append(datasetConfig.ArchivalNode) if datasetConfig.TapeNode != None: bindsStorageNode.append( { 'NODE' : datasetConfig.TapeNode } ) custodialSites.append(datasetConfig.TapeNode) if datasetConfig.DiskNode != None: bindsStorageNode.append( { 'NODE' : datasetConfig.DiskNode } ) nonCustodialSites.append(datasetConfig.DiskNode) autoApproveSites.append(datasetConfig.DiskNode) if len(custodialSites) > 0 or len(nonCustodialSites) > 0: subscriptions.append( { 'custodialSites' : custodialSites, 'custodialSubType' : "Replica", 'nonCustodialSites' : nonCustodialSites, 'autoApproveSites' : autoApproveSites, 'priority' : "high", 'primaryDataset' : dataset, 'dataTier' : "RAW" } ) # # set subscriptions for error dataset # custodialSites = [] nonCustodialSites = [] autoApproveSites = [] if datasetConfig.ArchivalNode != None: custodialSites.append(datasetConfig.ArchivalNode) autoApproveSites.append(datasetConfig.ArchivalNode) if datasetConfig.ArchivalNode != expressPhEDExInjectNode: nonCustodialSites.append(expressPhEDExInjectNode) autoApproveSites.append(expressPhEDExInjectNode) if len(custodialSites) > 0 or len(nonCustodialSites) > 0: subscriptions.append( { 'custodialSites' : custodialSites, 'custodialSubType' : "Replica", 'nonCustodialSites' : nonCustodialSites, 'autoApproveSites' : autoApproveSites, 'priority' : "high", 'primaryDataset' : "%s-Error" % dataset, 'dataTier' : "RAW" } ) elif streamConfig.ProcessingStyle == "Express": for dataTier in streamConfig.Express.DataTiers: if dataTier not in [ "ALCARECO", "DQM" ]: outputModuleDetails.append( { 'dataTier' : dataTier, 'eventContent' : dataTier, 'selectEvents' : selectEvents, 'primaryDataset' : dataset } ) bindsPhEDExConfig.append( { 'RUN' : run, 'PRIMDS' : dataset, 'ARCHIVAL_NODE' : None, 'TAPE_NODE' : None, 'DISK_NODE' : expressPhEDExSubscribeNode } ) subscriptions.append( { 'custodialSites' : [], 'nonCustodialSites' : [ expressPhEDExSubscribeNode ], 'autoApproveSites' : [ expressPhEDExSubscribeNode ], 'priority' : "high", 'primaryDataset' : dataset } ) # # finally create WMSpec # outputs = {} if streamConfig.ProcessingStyle == "Bulk": taskName = "Repack" workflowName = "Repack_Run%d_Stream%s" % (run, stream) specArguments = {} specArguments['Group'] = "unknown" specArguments['Requestor'] = "unknown" specArguments['RequestorDN'] = "unknown" specArguments['TimePerEvent'] = 1 specArguments['SizePerEvent'] = 200 specArguments['Memory'] = 1800 specArguments['RequestPriority'] = 0 specArguments['CMSSWVersion'] = streamConfig.Repack.CMSSWVersion specArguments['ScramArch'] = streamConfig.Repack.ScramArch specArguments['ProcessingVersion'] = streamConfig.Repack.ProcessingVersion specArguments['MaxSizeSingleLumi'] = streamConfig.Repack.MaxSizeSingleLumi specArguments['MaxSizeMultiLumi'] = streamConfig.Repack.MaxSizeMultiLumi specArguments['MinInputSize'] = streamConfig.Repack.MinInputSize specArguments['MaxInputSize'] = streamConfig.Repack.MaxInputSize specArguments['MaxEdmSize'] = streamConfig.Repack.MaxEdmSize specArguments['MaxOverSize'] = streamConfig.Repack.MaxOverSize specArguments['MaxInputEvents'] = streamConfig.Repack.MaxInputEvents specArguments['MaxInputFiles'] = streamConfig.Repack.MaxInputFiles specArguments['UnmergedLFNBase'] = "/store/unmerged/%s" % runInfo['bulk_data_type'] if runInfo['backfill']: specArguments['MergedLFNBase'] = "/store/backfill/%s/%s" % (runInfo['backfill'], runInfo['bulk_data_type']) else: specArguments['MergedLFNBase'] = "/store/%s" % runInfo['bulk_data_type'] specArguments['BlockCloseDelay'] = streamConfig.Repack.BlockCloseDelay elif streamConfig.ProcessingStyle == "Express": taskName = "Express" workflowName = "Express_Run%d_Stream%s" % (run, stream) specArguments = {} specArguments['Group'] = "unknown" specArguments['Requestor'] = "unknown" specArguments['RequestorDN'] = "unknown" specArguments['TimePerEvent'] = 12 specArguments['SizePerEvent'] = 512 specArguments['Memory'] = 1800 if streamConfig.Express.Multicore: specArguments['Multicore'] = streamConfig.Express.Multicore specArguments['Memory'] = 1800 * streamConfig.Express.Multicore specArguments['RequestPriority'] = 0 specArguments['ProcessingString'] = "Express" specArguments['ProcessingVersion'] = streamConfig.Express.ProcessingVersion specArguments['Scenario'] = streamConfig.Express.Scenario specArguments['CMSSWVersion'] = streamConfig.Express.CMSSWVersion specArguments['ScramArch'] = streamConfig.Express.ScramArch specArguments['RecoCMSSWVersion'] = streamConfig.Express.RecoCMSSWVersion specArguments['RecoScramArch'] = streamConfig.Express.RecoScramArch specArguments['GlobalTag'] = streamConfig.Express.GlobalTag specArguments['GlobalTagTransaction'] = "Express_%d" % run specArguments['GlobalTagConnect'] = streamConfig.Express.GlobalTagConnect specArguments['MaxInputRate'] = streamConfig.Express.MaxInputRate specArguments['MaxInputEvents'] = streamConfig.Express.MaxInputEvents specArguments['MaxInputSize'] = streamConfig.Express.MaxInputSize specArguments['MaxInputFiles'] = streamConfig.Express.MaxInputFiles specArguments['MaxLatency'] = streamConfig.Express.MaxLatency specArguments['AlcaSkims'] = streamConfig.Express.AlcaSkims specArguments['DqmSequences'] = streamConfig.Express.DqmSequences specArguments['AlcaHarvestTimeout'] = runInfo['ah_timeout'] specArguments['AlcaHarvestDir'] = runInfo['ah_dir'] specArguments['DQMUploadProxy'] = dqmUploadProxy specArguments['DQMUploadUrl'] = runInfo['dqmuploadurl'] specArguments['StreamName'] = stream specArguments['SpecialDataset'] = specialDataset specArguments['UnmergedLFNBase'] = "/store/unmerged/express" specArguments['MergedLFNBase'] = "/store/express" if runInfo['backfill']: specArguments['MergedLFNBase'] = "/store/backfill/%s/express" % runInfo['backfill'] else: specArguments['MergedLFNBase'] = "/store/express" specArguments['PeriodicHarvestInterval'] = streamConfig.Express.PeriodicHarvestInterval specArguments['BlockCloseDelay'] = streamConfig.Express.BlockCloseDelay if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]: specArguments['RunNumber'] = run specArguments['AcquisitionEra'] = tier0Config.Global.AcquisitionEra specArguments['Outputs'] = outputModuleDetails specArguments['OverrideCatalog'] = "trivialcatalog_file:/cvmfs/cms.cern.ch/SITECONF/T2_CH_CERN/Tier0/override_catalog.xml?protocol=override" specArguments['ValidStatus'] = "VALID" specArguments['SiteWhitelist'] = [ "T2_CH_CERN_T0" ] specArguments['SiteBlacklist'] = [] if streamConfig.ProcessingStyle == "Bulk": factory = RepackWorkloadFactory() wmSpec = factory.factoryWorkloadConstruction(workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(runInfo['bulk_data_loc']) for subscription in subscriptions: wmSpec.setSubscriptionInformation(**subscription) elif streamConfig.ProcessingStyle == "Express": factory = ExpressWorkloadFactory() wmSpec = factory.factoryWorkloadConstruction(workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(expressPhEDExInjectNode) for subscription in subscriptions: wmSpec.setSubscriptionInformation(**subscription) if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]: wmSpec.setOwnerDetails("*****@*****.**", "T0", { 'vogroup': 'DEFAULT', 'vorole': 'DEFAULT', 'dn' : "*****@*****.**" } ) wmSpec.setupPerformanceMonitoring(maxRSS = 10485760, maxVSize = 10485760, softTimeout = 604800, gracePeriod = 3600) wmbsHelper = WMBSHelper(wmSpec, taskName, cachepath = specDirectory) filesetName = "Run%d_Stream%s" % (run, stream) fileset = Fileset(filesetName) # # create workflow (currently either repack or express) # try: myThread.transaction.begin() if len(bindsCMSSWVersion) > 0: insertCMSSWVersionDAO.execute(bindsCMSSWVersion, conn = myThread.transaction.conn, transaction = True) if len(bindsDataset) > 0: insertDatasetDAO.execute(bindsDataset, conn = myThread.transaction.conn, transaction = True) if len(bindsStreamDataset) > 0: insertStreamDatasetDAO.execute(bindsStreamDataset, conn = myThread.transaction.conn, transaction = True) if len(bindsRepackConfig) > 0: insertRepackConfigDAO.execute(bindsRepackConfig, conn = myThread.transaction.conn, transaction = True) if len(bindsPromptCalibration) > 0: insertPromptCalibrationDAO.execute(bindsPromptCalibration, conn = myThread.transaction.conn, transaction = True) if len(bindsExpressConfig) > 0: insertExpressConfigDAO.execute(bindsExpressConfig, conn = myThread.transaction.conn, transaction = True) if len(bindsSpecialDataset) > 0: insertSpecialDatasetDAO.execute(bindsSpecialDataset, conn = myThread.transaction.conn, transaction = True) if len(bindsDatasetScenario) > 0: insertDatasetScenarioDAO.execute(bindsDatasetScenario, conn = myThread.transaction.conn, transaction = True) if len(bindsStorageNode) > 0: insertStorageNodeDAO.execute(bindsStorageNode, conn = myThread.transaction.conn, transaction = True) if len(bindsPhEDExConfig) > 0: insertPhEDExConfigDAO.execute(bindsPhEDExConfig, conn = myThread.transaction.conn, transaction = True) insertStreamStyleDAO.execute(bindsStreamStyle, conn = myThread.transaction.conn, transaction = True) if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]: insertStreamFilesetDAO.execute(run, stream, filesetName, conn = myThread.transaction.conn, transaction = True) fileset.load() wmbsHelper.createSubscription(wmSpec.getTask(taskName), fileset, alternativeFilesetClose = True) insertWorkflowMonitoringDAO.execute([fileset.id], conn = myThread.transaction.conn, transaction = True) if streamConfig.ProcessingStyle == "Bulk": bindsRecoReleaseConfig = [] for fileset, primds in wmbsHelper.getMergeOutputMapping().items(): bindsRecoReleaseConfig.append( { 'RUN' : run, 'PRIMDS' : primds, 'FILESET' : fileset, 'RECODELAY' : promptRecoDelay[primds], 'RECODELAYOFFSET' : promptRecoDelayOffset[primds] } ) insertRecoReleaseConfigDAO.execute(bindsRecoReleaseConfig, conn = myThread.transaction.conn, transaction = True) elif streamConfig.ProcessingStyle == "Express": markWorkflowsInjectedDAO.execute([workflowName], injected = True, conn = myThread.transaction.conn, transaction = True) except Exception as ex: logging.exception(ex) myThread.transaction.rollback() raise RuntimeError("Problem in configureRunStream() database transaction !") else: myThread.transaction.commit() else: # should we do anything for local runs ? pass return
def releasePromptReco(tier0Config, specDirectory, dqmUploadProxy = None): """ _releasePromptReco_ Called by Tier0Feeder Finds all run/primds that need to be released for PromptReco ( run.end_time + reco_release_config.delay > now AND run.end_time > 0 ) Create workflows and subscriptions for the processing of runs/datasets. """ logging.debug("releasePromptReco()") myThread = threading.currentThread() daoFactory = DAOFactory(package = "T0.WMBS", logger = logging, dbinterface = myThread.dbi) insertDatasetScenarioDAO = daoFactory(classname = "RunConfig.InsertDatasetScenario") insertCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion") insertRecoConfigDAO = daoFactory(classname = "RunConfig.InsertRecoConfig") insertStorageNodeDAO = daoFactory(classname = "RunConfig.InsertStorageNode") insertPhEDExConfigDAO = daoFactory(classname = "RunConfig.InsertPhEDExConfig") insertPromptSkimConfigDAO = daoFactory(classname = "RunConfig.InsertPromptSkimConfig") releasePromptRecoDAO = daoFactory(classname = "RunConfig.ReleasePromptReco") insertWorkflowMonitoringDAO = daoFactory(classname = "RunConfig.InsertWorkflowMonitoring") bindsDatasetScenario = [] bindsCMSSWVersion = [] bindsRecoConfig = [] bindsStorageNode = [] bindsPromptSkimConfig = [] bindsReleasePromptReco = [] # mark workflows as injected wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) markWorkflowsInjectedDAO = wmbsDaoFactory(classname = "Workflow.MarkInjectedWorkflows") # # for creating PromptReco specs # recoSpecs = {} # # for PhEDEx subscription settings # subscriptions = [] findRecoReleaseDAO = daoFactory(classname = "RunConfig.FindRecoRelease") recoRelease = findRecoReleaseDAO.execute(transaction = False) for run in sorted(recoRelease.keys()): # retrieve some basic run information getRunInfoDAO = daoFactory(classname = "RunConfig.GetRunInfo") runInfo = getRunInfoDAO.execute(run, transaction = False)[0] # retrieve phedex configs for run getPhEDExConfigDAO = daoFactory(classname = "RunConfig.GetPhEDExConfig") phedexConfigs = getPhEDExConfigDAO.execute(run, transaction = False) for (dataset, fileset, repackProcVer) in recoRelease[run]: bindsReleasePromptReco.append( { 'RUN' : run, 'PRIMDS' : dataset, 'NOW' : int(time.time()) } ) datasetConfig = retrieveDatasetConfig(tier0Config, dataset) bindsDatasetScenario.append( { 'RUN' : run, 'PRIMDS' : dataset, 'SCENARIO' : datasetConfig.Scenario } ) bindsCMSSWVersion.append( { 'VERSION' : datasetConfig.Reco.CMSSWVersion } ) alcaSkim = None if len(datasetConfig.Reco.AlcaSkims) > 0: alcaSkim = ",".join(datasetConfig.Reco.AlcaSkims) dqmSeq = None if len(datasetConfig.Reco.DqmSequences) > 0: dqmSeq = ",".join(datasetConfig.Reco.DqmSequences) bindsRecoConfig.append( { 'RUN' : run, 'PRIMDS' : dataset, 'DO_RECO' : int(datasetConfig.Reco.DoReco), 'CMSSW' : datasetConfig.Reco.CMSSWVersion, 'RECO_SPLIT' : datasetConfig.Reco.EventSplit, 'WRITE_RECO' : int(datasetConfig.Reco.WriteRECO), 'WRITE_DQM' : int(datasetConfig.Reco.WriteDQM), 'WRITE_AOD' : int(datasetConfig.Reco.WriteAOD), 'PROC_VER' : datasetConfig.Reco.ProcessingVersion, 'ALCA_SKIM' : alcaSkim, 'DQM_SEQ' : dqmSeq, 'GLOBAL_TAG' : datasetConfig.Reco.GlobalTag } ) phedexConfig = phedexConfigs[dataset] custodialSites = [] nonCustodialSites = [] autoApproveSites = [] for node, config in phedexConfig.items(): if config['custodial'] == 1: custodialSites.append(node) else: nonCustodialSites.append(node) if config['request_only'] == "n": autoApproveSites.append(node) if len(custodialSites) + len(nonCustodialSites) > 0: subscriptions.append( { 'custodialSites' : custodialSites, 'nonCustodialSites' : nonCustodialSites, 'autoApproveSites' : autoApproveSites, 'priority' : config['priority'], 'primaryDataset' : dataset } ) for tier1Skim in datasetConfig.Tier1Skims: bindsCMSSWVersion.append( { 'VERSION' : tier1Skim.CMSSWVersion } ) if tier1Skim.Node == None: tier1Skim.Node = datasetConfig.CustodialNode else: bindsStorageNode.append( { 'NODE' : tier1Skim.Node } ) if tier1Skim.Node == None: raise RuntimeError, "Configured a skim without providing a skim node or a custodial site\n" bindsPromptSkimConfig.append( { 'RUN' : run, 'PRIMDS' : dataset, 'TIER' : tier1Skim.DataTier, 'NODE' : tier1Skim.Node, 'CMSSW' : tier1Skim.CMSSWVersion, 'TWO_FILE_READ' : int(tier1Skim.TwoFileRead), 'PROC_VER' : tier1Skim.ProcessingVersion, 'SKIM_NAME' : tier1Skim.SkimName, 'GLOBAL_TAG' : tier1Skim.GlobalTag, "CONFIG_URL" : tier1Skim.ConfigURL } ) writeTiers = [] if datasetConfig.Reco.WriteRECO: writeTiers.append("RECO") if datasetConfig.Reco.WriteAOD: writeTiers.append("AOD") if datasetConfig.Reco.WriteDQM: writeTiers.append("DQM") if len(datasetConfig.Reco.AlcaSkims) > 0: writeTiers.append("ALCARECO") if datasetConfig.Reco.DoReco and len(writeTiers) > 0: # # create WMSpec # taskName = "Reco" workflowName = "PromptReco_Run%d_%s" % (run, dataset) specArguments = getPromptRecoArguments() specArguments['AcquisitionEra'] = runInfo['acq_era'] specArguments['CMSSWVersion'] = datasetConfig.Reco.CMSSWVersion specArguments['RunNumber'] = run specArguments['StdJobSplitArgs'] = {'events_per_job' : datasetConfig.Reco.EventSplit} specArguments['ProcessingString'] = "PromptReco" specArguments['ProcessingVersion'] = datasetConfig.Reco.ProcessingVersion specArguments['ProcScenario'] = datasetConfig.Scenario specArguments['GlobalTag'] = datasetConfig.Reco.GlobalTag specArguments['InputDataset'] = "/%s/%s-%s/RAW" % (dataset, runInfo['acq_era'], repackProcVer) specArguments['WriteTiers'] = writeTiers specArguments['AlcaSkims'] = datasetConfig.Reco.AlcaSkims specArguments['DqmSequences'] = datasetConfig.Reco.DqmSequences specArguments['UnmergedLFNBase'] = "%s/t0temp/%s" % (runInfo['lfn_prefix'], runInfo['bulk_data_type']) specArguments['MergedLFNBase'] = "%s/%s" % (runInfo['lfn_prefix'], runInfo['bulk_data_type']) specArguments['OverrideCatalog'] = "trivialcatalog_file:/afs/cern.ch/cms/SITECONF/T0_CH_CERN/Tier0/override_catalog.xml?protocol=override" specArguments['ValidStatus'] = "VALID" specArguments['DQMUploadProxy'] = dqmUploadProxy specArguments['DQMUploadUrl'] = runInfo['dqmuploadurl'] wmSpec = promptrecoWorkload(workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(runInfo['bulk_data_loc']) for subscription in subscriptions: wmSpec.setSubscriptionInformation(**subscription) wmSpec.setOwnerDetails("*****@*****.**", "T0", { 'vogroup': 'DEFAULT', 'vorole': 'DEFAULT', 'dn' : "*****@*****.**" } ) wmSpec.setupPerformanceMonitoring(maxRSS = 10485760, maxVSize = 10485760, softTimeout = 604800, gracePeriod = 3600) wmbsHelper = WMBSHelper(wmSpec, taskName, cachepath = specDirectory) recoSpecs[workflowName] = (wmbsHelper, wmSpec, fileset) try: myThread.transaction.begin() if len(bindsDatasetScenario) > 0: insertDatasetScenarioDAO.execute(bindsDatasetScenario, conn = myThread.transaction.conn, transaction = True) if len(bindsCMSSWVersion) > 0: insertCMSSWVersionDAO.execute(bindsCMSSWVersion, conn = myThread.transaction.conn, transaction = True) if len(bindsRecoConfig) > 0: insertRecoConfigDAO.execute(bindsRecoConfig, conn = myThread.transaction.conn, transaction = True) if len(bindsStorageNode) > 0: insertStorageNodeDAO.execute(bindsStorageNode, conn = myThread.transaction.conn, transaction = True) if len(bindsPromptSkimConfig) > 0: insertPromptSkimConfigDAO.execute(bindsPromptSkimConfig, conn = myThread.transaction.conn, transaction = True) if len(bindsReleasePromptReco) > 0: releasePromptRecoDAO.execute(bindsReleasePromptReco, conn = myThread.transaction.conn, transaction = True) for (wmbsHelper, wmSpec, fileset) in recoSpecs.values(): wmbsHelper.createSubscription(wmSpec.getTask(taskName), Fileset(id = fileset), alternativeFilesetClose = True) insertWorkflowMonitoringDAO.execute([fileset], conn = myThread.transaction.conn, transaction = True) if len(recoSpecs) > 0: markWorkflowsInjectedDAO.execute(recoSpecs.keys(), injected = True, conn = myThread.transaction.conn, transaction = True) except: myThread.transaction.rollback() raise else: myThread.transaction.commit() return
def configureRunStream(tier0Config, run, stream, specDirectory, dqmUploadProxy): """ _configureRunStream_ Called by Tier0Feeder for new run/streams. Retrieve global run settings and build the part of the configuration relevant to run/stream and write it to the database. Create workflows, filesets and subscriptions for the processing of runs/streams. """ logging.debug("configureRunStream() : %d , %s" % (run, stream)) myThread = threading.currentThread() daoFactory = DAOFactory(package = "T0.WMBS", logger = logging, dbinterface = myThread.dbi) # retrieve some basic run information getRunInfoDAO = daoFactory(classname = "RunConfig.GetRunInfo") runInfo = getRunInfoDAO.execute(run, transaction = False)[0] # # treat centralDAQ or miniDAQ runs (have an HLT key) different from local runs # if runInfo['hltkey'] != None: # streams not explicitely configured are repacked if stream not in tier0Config.Streams.dictionary_().keys(): addRepackConfig(tier0Config, stream) streamConfig = tier0Config.Streams.dictionary_()[stream] # write stream/dataset mapping (for special express and error datasets) insertDatasetDAO = daoFactory(classname = "RunConfig.InsertPrimaryDataset") insertStreamDatasetDAO = daoFactory(classname = "RunConfig.InsertStreamDataset") # write stream configuration insertStreamStyleDAO = daoFactory(classname = "RunConfig.InsertStreamStyle") insertRepackConfigDAO = daoFactory(classname = "RunConfig.InsertRepackConfig") insertPromptCalibrationDAO = daoFactory(classname = "RunConfig.InsertPromptCalibration") insertExpressConfigDAO = daoFactory(classname = "RunConfig.InsertExpressConfig") insertSpecialDatasetDAO = daoFactory(classname = "RunConfig.InsertSpecialDataset") insertDatasetScenarioDAO = daoFactory(classname = "RunConfig.InsertDatasetScenario") insertCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion") updateStreamOverrideDAO = daoFactory(classname = "RunConfig.UpdateStreamOverride") insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset") insertRecoReleaseConfigDAO = daoFactory(classname = "RunConfig.InsertRecoReleaseConfig") insertWorkflowMonitoringDAO = daoFactory(classname = "RunConfig.InsertWorkflowMonitoring") insertStorageNodeDAO = daoFactory(classname = "RunConfig.InsertStorageNode") insertPhEDExConfigDAO = daoFactory(classname = "RunConfig.InsertPhEDExConfig") bindsDataset = [] bindsStreamDataset = [] bindsStreamStyle = {'RUN' : run, 'STREAM' : stream, 'STYLE': streamConfig.ProcessingStyle } bindsRepackConfig = {} bindsPromptCalibration = {} bindsExpressConfig = {} bindsSpecialDataset = {} bindsDatasetScenario = [] bindsCMSSWVersion = [] bindsStreamOverride = {} bindsStorageNode = [] bindsPhEDExConfig = [] # mark workflows as injected wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) markWorkflowsInjectedDAO = wmbsDaoFactory(classname = "Workflow.MarkInjectedWorkflows") # # for spec creation, details for all outputs # outputModuleDetails = [] # # for PromptReco delay settings # promptRecoDelay = {} promptRecoDelayOffset = {} # # for PhEDEx subscription settings # subscriptions = { 'Express' : [], 'Bulk' : [] } # some hardcoded PhEDEx defaults expressPhEDExInjectNode = "T2_CH_CERN" expressPhEDExSubscribeNode = "T2_CH_CERN" # # first take care of all stream settings # getStreamOnlineVersionDAO = daoFactory(classname = "RunConfig.GetStreamOnlineVersion") onlineVersion = getStreamOnlineVersionDAO.execute(run, stream, transaction = False) if streamConfig.ProcessingStyle == "Bulk": bindsRepackConfig = { 'RUN' : run, 'STREAM' : stream, 'PROC_VER': streamConfig.Repack.ProcessingVersion, 'MAX_SIZE_SINGLE_LUMI' : streamConfig.Repack.MaxSizeSingleLumi, 'MAX_SIZE_MULTI_LUMI' : streamConfig.Repack.MaxSizeMultiLumi, 'MIN_SIZE' : streamConfig.Repack.MinInputSize, 'MAX_SIZE' : streamConfig.Repack.MaxInputSize, 'MAX_EDM_SIZE' : streamConfig.Repack.MaxEdmSize, 'MAX_OVER_SIZE' : streamConfig.Repack.MaxOverSize, 'MAX_EVENTS' : streamConfig.Repack.MaxInputEvents, 'MAX_FILES' : streamConfig.Repack.MaxInputFiles } elif streamConfig.ProcessingStyle == "Express": specialDataset = "Stream%s" % stream bindsDataset.append( { 'PRIMDS' : specialDataset } ) bindsStreamDataset.append( { 'RUN' : run, 'PRIMDS' : specialDataset, 'STREAM' : stream } ) bindsSpecialDataset = { 'STREAM' : stream, 'PRIMDS' : specialDataset } bindsDatasetScenario.append( { 'RUN' : run, 'PRIMDS' : specialDataset, 'SCENARIO' : streamConfig.Express.Scenario } ) if "DQM" in streamConfig.Express.DataTiers: outputModuleDetails.append( { 'dataTier' : "DQM", 'eventContent' : "DQM", 'primaryDataset' : specialDataset } ) bindsStorageNode.append( { 'NODE' : expressPhEDExSubscribeNode } ) bindsPhEDExConfig.append( { 'RUN' : run, 'PRIMDS' : specialDataset, 'NODE' : expressPhEDExSubscribeNode, 'CUSTODIAL' : 1, 'REQ_ONLY' : "n", 'PRIO' : "high" } ) subscriptions['Express'].append( { 'custodialSites' : [], 'nonCustodialSites' : [expressPhEDExSubscribeNode], 'autoApproveSites' : [expressPhEDExSubscribeNode], 'priority' : "high", 'primaryDataset' : specialDataset } ) alcaSkim = None if "ALCARECO" in streamConfig.Express.DataTiers: if len(streamConfig.Express.AlcaSkims) > 0: outputModuleDetails.append( { 'dataTier' : "ALCARECO", 'eventContent' : "ALCARECO", 'primaryDataset' : specialDataset } ) alcaSkim = ",".join(streamConfig.Express.AlcaSkims) if "PromptCalibProd" in streamConfig.Express.AlcaSkims: bindsPromptCalibration = { 'RUN' : run, 'STREAM' : stream } dqmSeq = None if len(streamConfig.Express.DqmSequences) > 0: dqmSeq = ",".join(streamConfig.Express.DqmSequences) bindsExpressConfig = { 'RUN' : run, 'STREAM' : stream, 'PROC_VER' : streamConfig.Express.ProcessingVersion, 'WRITE_TIERS' : ",".join(streamConfig.Express.DataTiers), 'GLOBAL_TAG' : streamConfig.Express.GlobalTag, 'MAX_EVENTS' : streamConfig.Express.MaxInputEvents, 'MAX_SIZE' : streamConfig.Express.MaxInputSize, 'MAX_FILES' : streamConfig.Express.MaxInputFiles, 'MAX_LATENCY' : streamConfig.Express.MaxLatency, 'ALCA_SKIM' : alcaSkim, 'DQM_SEQ' : dqmSeq } overrideVersion = streamConfig.VersionOverride.get(onlineVersion, None) if overrideVersion != None: bindsCMSSWVersion.append( { 'VERSION' : overrideVersion } ) bindsStreamOverride = { "RUN" : run, "STREAM" : stream, "OVERRIDE" : overrideVersion } # # then configure datasets # getStreamDatasetTriggersDAO = daoFactory(classname = "RunConfig.GetStreamDatasetTriggers") datasetTriggers = getStreamDatasetTriggersDAO.execute(run, stream, transaction = False) for dataset, paths in datasetTriggers.items(): if dataset == "Unassigned path": if stream == "Express" and run in [ 210114, 210116, 210120, 210121, 210178 ]: continue datasetConfig = retrieveDatasetConfig(tier0Config, dataset) selectEvents = [] for path in sorted(paths): selectEvents.append("%s:%s" % (path, runInfo['process'])) if streamConfig.ProcessingStyle == "Bulk": promptRecoDelay[datasetConfig.Name] = datasetConfig.RecoDelay promptRecoDelayOffset[datasetConfig.Name] = datasetConfig.RecoDelayOffset outputModuleDetails.append( { 'dataTier' : "RAW", 'eventContent' : "ALL", 'selectEvents' : selectEvents, 'primaryDataset' : dataset } ) custodialSites = [] nonCustodialSites = [] autoApproveSites = [] if datasetConfig.CustodialNode != None: custodialSites.append(datasetConfig.CustodialNode) requestOnly = "y" if datasetConfig.CustodialAutoApprove: requestOnly = "n" autoApproveSites.append(datasetConfig.CustodialNode) bindsStorageNode.append( { 'NODE' : datasetConfig.CustodialNode } ) bindsPhEDExConfig.append( { 'RUN' : run, 'PRIMDS' : dataset, 'NODE' : datasetConfig.CustodialNode, 'CUSTODIAL' : 1, 'REQ_ONLY' : requestOnly, 'PRIO' : datasetConfig.CustodialPriority } ) if datasetConfig.ArchivalNode != None: custodialSites.append(datasetConfig.ArchivalNode) autoApproveSites.append(datasetConfig.ArchivalNode) bindsStorageNode.append( { 'NODE' : datasetConfig.ArchivalNode } ) bindsPhEDExConfig.append( { 'RUN' : run, 'PRIMDS' : dataset, 'NODE' : datasetConfig.ArchivalNode, 'CUSTODIAL' : 1, 'REQ_ONLY' : "n", 'PRIO' : datasetConfig.CustodialPriority } ) if len(custodialSites) + len(nonCustodialSites) > 0: subscriptions['Bulk'].append( { 'custodialSites' : custodialSites, 'nonCustodialSites' : nonCustodialSites, 'autoApproveSites' : autoApproveSites, 'priority' : datasetConfig.CustodialPriority, 'primaryDataset' : dataset } ) elif streamConfig.ProcessingStyle == "Express": for dataTier in streamConfig.Express.DataTiers: if dataTier not in [ "ALCARECO", "DQM" ]: outputModuleDetails.append( { 'dataTier' : dataTier, 'eventContent' : dataTier, 'selectEvents' : selectEvents, 'primaryDataset' : dataset } ) bindsPhEDExConfig.append( { 'RUN' : run, 'PRIMDS' : dataset, 'NODE' : expressPhEDExSubscribeNode, 'CUSTODIAL' : 1, 'REQ_ONLY' : "n", 'PRIO' : "high" } ) subscriptions['Express'].append( { 'custodialSites' : [], 'nonCustodialSites' : [expressPhEDExSubscribeNode], 'autoApproveSites' : [expressPhEDExSubscribeNode], 'priority' : "high", 'primaryDataset' : dataset } ) # # finally create WMSpec # outputs = {} if streamConfig.ProcessingStyle == "Bulk": taskName = "Repack" workflowName = "Repack_Run%d_Stream%s" % (run, stream) specArguments = getRepackArguments() specArguments['ProcessingVersion'] = streamConfig.Repack.ProcessingVersion specArguments['MaxSizeSingleLumi'] = streamConfig.Repack.MaxSizeSingleLumi specArguments['MaxSizeMultiLumi'] = streamConfig.Repack.MaxSizeMultiLumi specArguments['MinInputSize'] = streamConfig.Repack.MinInputSize specArguments['MaxInputSize'] = streamConfig.Repack.MaxInputSize specArguments['MaxEdmSize'] = streamConfig.Repack.MaxEdmSize specArguments['MaxOverSize'] = streamConfig.Repack.MaxOverSize specArguments['MaxInputEvents'] = streamConfig.Repack.MaxInputEvents specArguments['MaxInputFiles'] = streamConfig.Repack.MaxInputFiles specArguments['UnmergedLFNBase'] = "%s/t0temp/%s" % (runInfo['lfn_prefix'], runInfo['bulk_data_type']) specArguments['MergedLFNBase'] = "%s/%s" % (runInfo['lfn_prefix'], runInfo['bulk_data_type']) elif streamConfig.ProcessingStyle == "Express": taskName = "Express" workflowName = "Express_Run%d_Stream%s" % (run, stream) specArguments = getExpressArguments() specArguments['ProcessingString'] = "Express" specArguments['ProcessingVersion'] = streamConfig.Express.ProcessingVersion specArguments['ProcScenario'] = streamConfig.Express.Scenario specArguments['GlobalTag'] = streamConfig.Express.GlobalTag specArguments['GlobalTagTransaction'] = "Express_%d" % run specArguments['MaxInputEvents'] = streamConfig.Express.MaxInputEvents specArguments['MaxInputSize'] = streamConfig.Express.MaxInputSize specArguments['MaxInputFiles'] = streamConfig.Express.MaxInputFiles specArguments['MaxLatency'] = streamConfig.Express.MaxLatency specArguments['AlcaSkims'] = streamConfig.Express.AlcaSkims specArguments['DqmSequences'] = streamConfig.Express.DqmSequences specArguments['UnmergedLFNBase'] = "%s/t0temp/express" % runInfo['lfn_prefix'] specArguments['MergedLFNBase'] = "%s/express" % runInfo['lfn_prefix'] specArguments['AlcaHarvestTimeout'] = runInfo['ah_timeout'] specArguments['AlcaHarvestDir'] = runInfo['ah_dir'] specArguments['DQMUploadProxy'] = dqmUploadProxy specArguments['DQMUploadUrl'] = runInfo['dqmuploadurl'] specArguments['StreamName'] = stream if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]: specArguments['RunNumber'] = run specArguments['AcquisitionEra'] = tier0Config.Global.AcquisitionEra specArguments['CMSSWVersion'] = streamConfig.VersionOverride.get(onlineVersion, onlineVersion) specArguments['Outputs'] = outputModuleDetails specArguments['OverrideCatalog'] = "trivialcatalog_file:/afs/cern.ch/cms/SITECONF/T0_CH_CERN/Tier0/override_catalog.xml?protocol=override" specArguments['ValidStatus'] = "VALID" if streamConfig.ProcessingStyle == "Bulk": wmSpec = repackWorkload(workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(runInfo['bulk_data_loc']) for subscription in subscriptions['Bulk']: wmSpec.setSubscriptionInformation(**subscription) elif streamConfig.ProcessingStyle == "Express": wmSpec = expressWorkload(workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(expressPhEDExInjectNode) for subscription in subscriptions['Express']: wmSpec.setSubscriptionInformation(**subscription) if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]: wmSpec.setOwnerDetails("*****@*****.**", "T0", { 'vogroup': 'DEFAULT', 'vorole': 'DEFAULT', 'dn' : "*****@*****.**" } ) wmSpec.setupPerformanceMonitoring(maxRSS = 10485760, maxVSize = 10485760, softTimeout = 604800, gracePeriod = 3600) wmbsHelper = WMBSHelper(wmSpec, taskName, cachepath = specDirectory) filesetName = "Run%d_Stream%s" % (run, stream) fileset = Fileset(filesetName) # # create workflow (currently either repack or express) # try: myThread.transaction.begin() if len(bindsDataset) > 0: insertDatasetDAO.execute(bindsDataset, conn = myThread.transaction.conn, transaction = True) if len(bindsStreamDataset) > 0: insertStreamDatasetDAO.execute(bindsStreamDataset, conn = myThread.transaction.conn, transaction = True) if len(bindsRepackConfig) > 0: insertRepackConfigDAO.execute(bindsRepackConfig, conn = myThread.transaction.conn, transaction = True) if len(bindsPromptCalibration) > 0: insertPromptCalibrationDAO.execute(bindsPromptCalibration, conn = myThread.transaction.conn, transaction = True) if len(bindsExpressConfig) > 0: insertExpressConfigDAO.execute(bindsExpressConfig, conn = myThread.transaction.conn, transaction = True) if len(bindsSpecialDataset) > 0: insertSpecialDatasetDAO.execute(bindsSpecialDataset, conn = myThread.transaction.conn, transaction = True) if len(bindsDatasetScenario) > 0: insertDatasetScenarioDAO.execute(bindsDatasetScenario, conn = myThread.transaction.conn, transaction = True) if len(bindsCMSSWVersion) > 0: insertCMSSWVersionDAO.execute(bindsCMSSWVersion, conn = myThread.transaction.conn, transaction = True) if len(bindsStreamOverride) > 0: updateStreamOverrideDAO.execute(bindsStreamOverride, conn = myThread.transaction.conn, transaction = True) if len(bindsStorageNode) > 0: insertStorageNodeDAO.execute(bindsStorageNode, conn = myThread.transaction.conn, transaction = True) if len(bindsPhEDExConfig) > 0: insertPhEDExConfigDAO.execute(bindsPhEDExConfig, conn = myThread.transaction.conn, transaction = True) insertStreamStyleDAO.execute(bindsStreamStyle, conn = myThread.transaction.conn, transaction = True) if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]: insertStreamFilesetDAO.execute(run, stream, filesetName, conn = myThread.transaction.conn, transaction = True) fileset.load() wmbsHelper.createSubscription(wmSpec.getTask(taskName), fileset, alternativeFilesetClose = True) insertWorkflowMonitoringDAO.execute([fileset.id], conn = myThread.transaction.conn, transaction = True) if streamConfig.ProcessingStyle == "Bulk": bindsRecoReleaseConfig = [] for fileset, primds in wmbsHelper.getMergeOutputMapping().items(): bindsRecoReleaseConfig.append( { 'RUN' : run, 'PRIMDS' : primds, 'FILESET' : fileset, 'RECODELAY' : promptRecoDelay[primds], 'RECODELAYOFFSET' : promptRecoDelayOffset[primds] } ) insertRecoReleaseConfigDAO.execute(bindsRecoReleaseConfig, conn = myThread.transaction.conn, transaction = True) elif streamConfig.ProcessingStyle == "Express": markWorkflowsInjectedDAO.execute([workflowName], injected = True, conn = myThread.transaction.conn, transaction = True) except: myThread.transaction.rollback() raise else: myThread.transaction.commit() else: # should we do anything for local runs ? pass return