def assignRequest(requestName, teamName, prodMgr=None, wmstatUrl=None): """ _assignRequest_ Assign a request to a team. This does the following: - Changes the status to assigned - Creates an association to the team provided - Optionally associates the request to a prod mgr instance """ factory = DBConnect.getConnection() reqId = requestID(requestName) teamId = factory(classname="Team.ID").execute(teamName) if teamId == None: msg = "Team named %s not known in database" % teamName msg += "Failed to assign request %s to team %s" % (requestName, teamName) raise RuntimeError(msg) if wmstatUrl: wmstatSvc = WMStatsWriter(wmstatUrl) wmstatSvc.updateTeam(requestName, teamName) assigner = factory(classname="Assignment.New") assigner.execute(reqId, teamId) changeRequestStatus(requestName, 'assigned', priority=None, wmstatUrl=wmstatUrl) if prodMgr != None: addPM = factory(classname="Progress.ProdMgr") addPM.execute(reqId, prodMgr)
def setup(self, parameters): """ set db connection(couchdb, wmbs) to prepare to gather information """ # set the connection to local queue if not hasattr(self.config, "Tier0Feeder"): self.localQueue = WorkQueueService(self.config.AnalyticsDataCollector.localQueueURL) # set the connection for local couchDB call self.localCouchDB = LocalCouchDBData(self.config.AnalyticsDataCollector.localCouchURL, self.config.JobStateMachine.summaryStatsDBName, self.summaryLevel) # interface to WMBS/BossAir db myThread = threading.currentThread() # set wmagent db data self.wmagentDB = WMAgentDBData(self.summaryLevel, myThread.dbi, myThread.logger) # set the connection for local couchDB call self.localSummaryCouchDB = WMStatsWriter(self.config.AnalyticsDataCollector.localWMStatsURL, appName="WMStatsAgent") if hasattr(self.config, "Tier0Feeder"): #use local db for tier0 centralRequestCouchDBURL = self.config.AnalyticsDataCollector.localT0RequestDBURL else: centralRequestCouchDBURL = self.config.AnalyticsDataCollector.centralRequestDBURL self.centralRequestCouchDB = RequestDBWriter(centralRequestCouchDBURL, couchapp = self.config.AnalyticsDataCollector.RequestCouchApp) #TODO: change the config to hold couch url self.localCouchServer = CouchMonitor(self.config.JobStateMachine.couchurl) self.dbsBufferUtil = DBSBufferUtil() if self.pluginName != None: pluginFactory = WMFactory("plugins", "WMComponent.AnalyticsDataCollector.Plugins") self.plugin = pluginFactory.loadObject(classname = self.pluginName)
def setup(self, parameters): """ Called at startup """ self.teamName = self.config.Agent.teamName # set the connection for local couchDB call self.useReqMgrForCompletionCheck = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True) self.archiveDelayHours = getattr(self.config.TaskArchiver, 'archiveDelayHours', 0) self.wmstatsCouchDB = WMStatsWriter(self.config.TaskArchiver.localWMStatsURL, "WMStatsAgent") #TODO: we might need to use local db for Tier0 self.centralRequestDBReader = RequestDBReader(self.config.AnalyticsDataCollector.centralRequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) if self.useReqMgrForCompletionCheck: self.deletableState = "announced" self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) self.reqmgr2Svc = ReqMgr(self.config.TaskArchiver.ReqMgr2ServiceURL) #TODO: remove this when reqmgr2 replace reqmgr completely (reqmgr2Only) self.reqmgrSvc = RequestManager({'endpoint': self.config.TaskArchiver.ReqMgrServiceURL}) else: # Tier0 case self.deletableState = "completed" # use local for update self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.localT0RequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url'] jobDBName = self.config.JobStateMachine.couchDBName self.jobCouchdb = CouchServer(jobDBurl) self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName) self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName) statSummaryDBName = self.config.JobStateMachine.summaryStatsDBName self.statsumdatabase = self.jobCouchdb.connectDatabase(statSummaryDBName)
def changeRequestStatus(requestName, newState, priority=None, wmstatUrl=None): """ _changeRequestStatus_ Basic API to change a request to a new state, also includes optional priority change for the request - *requestName* : name of the request to be modified - *newState* : name of the new status for the request - *priority* : optional integer priority Apparently when changing request state (on assignment page), it's possible to change priority at one go. Hence the argument is here. """ #TODO: should we make this mendatory? if wmstatUrl: wmstatSvc = WMStatsWriter(wmstatUrl) wmstatSvc.updateRequestStatus(requestName, newState) factory = DBConnect.getConnection() reqId = getRequestID(factory, requestName) changeRequestIDStatus(reqId, newState, priority) return
for ds in helper.listOutputDatasets(): if ds not in request['OutputDatasets']: request['OutputDatasets'].append(ds) # don't want to JSONify the whole workflow del metadata['WorkloadSpec'] workloadUrl = helper.saveCouch(couchUrl, couchDB, metadata=metadata) request['RequestWorkflow'] = removePasswordFromUrl(workloadUrl) try: CheckIn.checkIn(request, reqSchema['RequestType']) except CheckIn.RequestCheckInError, ex: msg = ex._message raise HTTPError(400, "Error in Request check-in: %s" % msg) try: wmstatSvc = WMStatsWriter(wmstatUrl) wmstatSvc.insertRequest(request) except Exception as ex: webApi.error("Could not update WMStats, reason: %s" % ex) raise HTTPError(400, "Creating request failed, could not update WMStats.") return request def makeRequest(webApi, reqInputArgs, couchUrl, couchDB, wmstatUrl): """ Handles the submission of requests. """
def __init__(self, config): """ __init__ Create all DAO objects that are used by this class. """ WMConnectionBase.__init__(self, "WMCore.WMBS") myThread = threading.currentThread() self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) self.getOutputMapAction = self.daofactory( classname="Jobs.GetOutputMap") self.bulkAddToFilesetAction = self.daofactory( classname="Fileset.BulkAddByLFN") self.bulkParentageAction = self.daofactory( classname="Files.AddBulkParentage") self.getJobTypeAction = self.daofactory(classname="Jobs.GetType") self.getParentInfoAction = self.daofactory( classname="Files.GetParentInfo") self.setParentageByJob = self.daofactory( classname="Files.SetParentageByJob") self.setParentageByMergeJob = self.daofactory( classname="Files.SetParentageByMergeJob") self.setFileRunLumi = self.daofactory(classname="Files.AddRunLumi") self.setFileLocation = self.daofactory( classname="Files.SetLocationByLFN") self.setFileAddChecksum = self.daofactory( classname="Files.AddChecksumByLFN") self.addFileAction = self.daofactory(classname="Files.Add") self.jobCompleteInput = self.daofactory(classname="Jobs.CompleteInput") self.setBulkOutcome = self.daofactory(classname="Jobs.SetOutcomeBulk") self.getWorkflowSpec = self.daofactory( classname="Workflow.GetSpecAndNameFromTask") self.getJobInfoByID = self.daofactory(classname="Jobs.LoadFromID") self.getFullJobInfo = self.daofactory( classname="Jobs.LoadForErrorHandler") self.getJobTaskNameAction = self.daofactory( classname="Jobs.GetFWJRTaskName") self.pnn_to_psn = self.daofactory( classname="Locations.GetPNNtoPSNMapping").execute() self.dbsStatusAction = self.dbsDaoFactory( classname="DBSBufferFiles.SetStatus") self.dbsParentStatusAction = self.dbsDaoFactory( classname="DBSBufferFiles.GetParentStatus") self.dbsChildrenAction = self.dbsDaoFactory( classname="DBSBufferFiles.GetChildren") self.dbsCreateFiles = self.dbsDaoFactory( classname="DBSBufferFiles.Add") self.dbsSetLocation = self.dbsDaoFactory( classname="DBSBufferFiles.SetLocationByLFN") self.dbsInsertLocation = self.dbsDaoFactory( classname="DBSBufferFiles.AddLocation") self.dbsSetChecksum = self.dbsDaoFactory( classname="DBSBufferFiles.AddChecksumByLFN") self.dbsSetRunLumi = self.dbsDaoFactory( classname="DBSBufferFiles.AddRunLumi") self.dbsGetWorkflow = self.dbsDaoFactory(classname="ListWorkflow") self.dbsLFNHeritage = self.dbsDaoFactory( classname="DBSBufferFiles.BulkHeritageParent") self.stateChanger = ChangeState(config) # Decide whether or not to attach jobReport to returned value self.returnJobReport = getattr(config.JobAccountant, 'returnReportFromWorker', False) # Store location for the specs for DBS self.specDir = getattr(config.JobAccountant, 'specDir', None) # maximum RAW EDM size for Repack output before data is put into Error dataset and skips PromptReco self.maxAllowedRepackOutputSize = getattr( config.JobAccountant, 'maxAllowedRepackOutputSize', 12 * 1024 * 1024 * 1024) # ACDC service self.dataCollection = DataCollectionService( url=config.ACDC.couchurl, database=config.ACDC.database) jobDBurl = sanitizeURL(config.JobStateMachine.couchurl)['url'] jobDBName = config.JobStateMachine.couchDBName jobCouchdb = CouchServer(jobDBurl) self.fwjrCouchDB = jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName) self.localWMStats = WMStatsWriter(config.TaskArchiver.localWMStatsURL, appName="WMStatsAgent") # Hold data for later commital self.dbsFilesToCreate = [] self.wmbsFilesToBuild = [] self.wmbsMergeFilesToBuild = [] self.fileLocation = None self.mergedOutputFiles = [] self.listOfJobsToSave = [] self.listOfJobsToFail = [] self.filesetAssoc = [] self.parentageBinds = [] self.parentageBindsForMerge = [] self.jobsWithSkippedFiles = {} self.count = 0 self.datasetAlgoID = collections.deque(maxlen=1000) self.datasetAlgoPaths = collections.deque(maxlen=1000) self.dbsLocations = set() self.workflowIDs = collections.deque(maxlen=1000) self.workflowPaths = collections.deque(maxlen=1000) self.phedex = PhEDEx() self.locLists = self.phedex.getNodeMap() return
def __init__(self, rest, config): super(CleanUpTask, self).__init__(config) self.wmstatsDB = WMStatsWriter(config.wmstats_url, reqdbURL=config.reqmgrdb_url, reqdbCouchApp=config.reqdb_couch_app)
def updateRequestStatus(couchURL, requestList, status): ww = WMStatsWriter(couchURL) for request in requestList: ww.updateRequestStatus(request, status) print("%s is udated to %s" % (request, status))
def __init__(self, config): """ Initialise class members """ BaseWorkerThread.__init__(self) myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.config = config self.jobCacheDir = self.config.JobCreator.jobCacheDir if getattr(self.config.TaskArchiver, "useWorkQueue", False) != False: # Get workqueue setup from config unless overridden if hasattr(self.config.TaskArchiver, 'WorkQueueParams'): self.workQueue = localQueue( **self.config.TaskArchiver.WorkQueueParams) else: from WMCore.WorkQueue.WorkQueueUtils import queueFromConfig self.workQueue = queueFromConfig(self.config) else: self.workQueue = None self.maxProcessSize = getattr(self.config.TaskArchiver, 'maxProcessSize', 250) self.timeout = getattr(self.config.TaskArchiver, "timeOut", None) self.nOffenders = getattr(self.config.TaskArchiver, 'nOffenders', 3) self.useReqMgrForCompletionCheck = getattr( self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True) self.uploadPublishInfo = getattr(self.config.TaskArchiver, 'uploadPublishInfo', False) self.uploadPublishDir = getattr(self.config.TaskArchiver, 'uploadPublishDir', None) self.userFileCacheURL = getattr(self.config.TaskArchiver, 'userFileCacheURL', None) # Set up optional histograms self.histogramKeys = getattr(self.config.TaskArchiver, "histogramKeys", []) self.histogramBins = getattr(self.config.TaskArchiver, "histogramBins", 10) self.histogramLimit = getattr(self.config.TaskArchiver, "histogramLimit", 5.0) if not self.useReqMgrForCompletionCheck: #sets the local monitor summary couch db self.wmstatsCouchDB = WMStatsWriter( self.config.TaskArchiver.localWMStatsURL) self.centralCouchDBWriter = self.wmstatsCouchDB else: self.centralCouchDBWriter = WMStatsWriter( self.config.TaskArchiver.centralWMStatsURL) # Start a couch server for getting job info # from the FWJRs for committal to archive try: workDBName = getattr(self.config.TaskArchiver, 'workloadSummaryCouchDBName', 'workloadsummary') workDBurl = getattr(self.config.TaskArchiver, 'workloadSummaryCouchURL') jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url'] jobDBName = self.config.JobStateMachine.couchDBName self.jobCouchdb = CouchServer(jobDBurl) self.workCouchdb = CouchServer(workDBurl) self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName) self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName) self.workdatabase = self.workCouchdb.connectDatabase(workDBName) logging.debug("Using url %s/%s for job" % (jobDBurl, jobDBName)) logging.debug("Writing to %s/%s for workloadSummary" % (sanitizeURL(workDBurl)['url'], workDBName)) self.requireCouch = getattr(self.config.TaskArchiver, 'requireCouch', False) except Exception, ex: msg = "Error in connecting to couch.\n" msg += str(ex) logging.error(msg) self.jobsdatabase = None self.fwjrdatabase = None if getattr(self.config.TaskArchiver, 'requireCouch', False): raise TaskArchiverPollerException(msg)
import sys from optparse import OptionParser from WMCore.Services.WMStats.WMStatsWriter import WMStatsWriter from WMCore.Configuration import loadConfigurationFile if __name__ == "__main__": if "WMAGENT_CONFIG" not in os.environ: print "The WMAGENT_CONFIG environment variable needs to be set before this can run" sys.exit(1) wmagentConfig = loadConfigurationFile(os.environ["WMAGENT_CONFIG"]) if hasattr(wmagentConfig, "AnalyticsDataCollector") and hasattr( wmagentConfig.AnalyticsDataCollector, "centralWMStatsURL"): wmstats = WMStatsWriter( wmagentConfig.AnalyticsDataCollector.centralWMStatsURL) else: print "AnalyticsDataCollector.centralWMStatsURL is not specified" sys.exit(1) parser = OptionParser() parser.set_usage("wmstats-request-status-chagne [agent_url:port]") parser.add_option("-r", "--request", dest="request", help="resquest name") parser.add_option("-s", "--status", dest="newstatus", help="set to new status") (options, args) = parser.parse_args()
def buildWorkloadAndCheckIn(webApi, reqSchema, couchUrl, couchDB, wmstatUrl, clone=False): """ If clone is True, the function is called on a cloned request in which case no modification of the reqSchema shall happen and should be checked in as is. """ try: request = buildWorkloadForRequest(typename=reqSchema["RequestType"], schema=reqSchema) except WMSpecFactoryException as ex: logging.error(traceback.format_exc()) raise HTTPError(400, "Error in Workload Validation: %s" % ex.message()) helper = WMWorkloadHelper(request['WorkloadSpec']) # update request as well for wmstats update # there is a better way to do this (passing helper to request but make sure all the information is there) request["Campaign"] = helper.getCampaign() # Add the output datasets if necessary # for some bizarre reason OutpuDatasets is list of lists, when cloning # [['/MinimumBias/WMAgentCommissioning10-v2/RECO'], ['/MinimumBias/WMAgentCommissioning10-v2/ALCARECO']] # #3743 if not clone: for ds in helper.listOutputDatasets(): if ds not in request['OutputDatasets']: request['OutputDatasets'].append(ds) #TODO: need to update output dataset by Task for task chain requests # can't save Request object directly, because it makes it hard to retrieve the _rev metadata = {} metadata.update(request) # don't want to JSONify the whole workflow del metadata['WorkloadSpec'] workloadUrl = helper.saveCouch(couchUrl, couchDB, metadata=metadata) request['RequestWorkflow'] = removePasswordFromUrl(workloadUrl) try: CheckIn.checkIn(request, reqSchema['RequestType']) except CheckIn.RequestCheckInError as ex: raise HTTPError(400, "Error in Request check-in: %s" % str(ex)) # Inconsistent request parameters between Oracle and Couch (#4380, #4388) # metadata above is what is saved into couch to represent a request document. # Number of request arguments on a corresponding couch document # is not set, has default null/None values, update those accordingly now. # It's a mess to have two mutually inconsistent database backends. # Not easy to handle this earlier since couch is stored first and # some parameters are worked out later when storing into Oracle. reqDetails = requestDetails(request["RequestName"]) # couchdb request parameters which are null at the injection time and remain so paramsToUpdate = [ "RequestStatus", "RequestSizeFiles", "AcquisitionEra", "RequestWorkflow", "RequestType", "RequestStatus", "RequestPriority", "Requestor", "Group", "SizePerEvent", "PrepID", "RequestNumEvents", "ProcessingString", "ProcessingVersion", ] couchDb = Database(reqDetails["CouchWorkloadDBName"], reqDetails["CouchURL"]) fields = {} for key in paramsToUpdate: fields[key] = reqDetails[key] couchDb.updateDocument(request["RequestName"], "ReqMgr", "updaterequest", fields=fields, useBody=True) try: wmstatSvc = WMStatsWriter(wmstatUrl) wmstatSvc.insertRequest(request) except Exception as ex: webApi.error("Could not update WMStats, reason: %s" % ex) raise HTTPError(400, "Creating request failed, could not update WMStats.") return request
def __init__(self, rest, config): super(HeartbeatMonitorBase, self).__init__(config) self.centralWMStats = WMStatsWriter(config.wmstats_url) self.threadList = config.thread_list
def processInboundWork(self, inbound_work=None, throw=False): """Retrieve work from inbox, split and store If request passed then only process that request """ if self.params['LocalQueueFlag']: self.backend.fixConflicts() # db should be consistent result = [] if not inbound_work: inbound_work = self.backend.getElementsForSplitting() for inbound in inbound_work: # Check we haven't already split the work work = self.backend.getElementsForParent(inbound) try: if work: self.logger.info('Request "%s" already split - Resuming' % inbound['RequestName']) else: work, totalStats = self._splitWork(inbound['WMSpec'], None, inbound['Inputs'], inbound['Mask']) self.backend.insertElements( work, parent=inbound ) # if this fails, rerunning will pick up here # save inbound work to signal we have completed queueing # add the total work on wmstat summary self.backend.updateInboxElements(inbound.id, Status='Acquired') if not self.params.get( 'LocalQueueFlag') and self.params.get( 'WMStatsCouchUrl'): # only update global stats for global queue try: wmstatSvc = WMStatsWriter( self.params.get('WMStatsCouchUrl')) wmstatSvc.insertTotalStats( inbound['WMSpec'].name(), totalStats) except Exception, ex: self.logger.info( 'Error publishing %s to WMStats: %s' % (inbound['RequestName'], str(ex))) except TERMINAL_EXCEPTIONS, ex: self.logger.info('Failing workflow "%s": %s' % (inbound['RequestName'], str(ex))) self.backend.updateInboxElements(inbound.id, Status='Failed') if throw: raise except Exception, ex: # if request has been failing for too long permanently fail it. # last update time was when element was assigned to this queue if (float(inbound.updatetime) + self.params['QueueRetryTime']) < time.time(): self.logger.info( 'Failing workflow "%s" as not queued in %d secs: %s' % (inbound['RequestName'], self.params['QueueRetryTime'], str(ex))) self.backend.updateInboxElements(inbound.id, Status='Failed') else: self.logger.info( 'Exception splitting work for wmspec "%s": %s' % (inbound['RequestName'], str(ex))) if throw: raise continue
def __init__(self, rest, config): CherryPyPeriodicTask.__init__(self, config) self.wmstatsDB = WMStatsWriter(config.wmstats_url, reqdbURL=config.reqmgrdb_url, reqdbCouchApp=config.reqdb_couch_app)