def printTaskInfo(self, crabDBInfo, username): """ Print general information like project directory, task name, scheduler, task status (in the database), dashboard URL, warnings and failire messages in the database. """ schedd = getColumn(crabDBInfo, 'tm_schedd') status = getColumn(crabDBInfo, 'tm_task_status') command = getColumn(crabDBInfo, 'tm_task_command') warnings = literal_eval(getColumn(crabDBInfo, 'tm_task_warnings')) failure = getColumn(crabDBInfo, 'tm_task_failure') self.logger.info("CRAB project directory:\t\t%s" % (self.requestarea)) self.logger.info("Task name:\t\t\t%s" % self.cachedinfo['RequestName']) if schedd: msg = "Grid scheduler:\t\t\t%s" % schedd self.logger.info(msg) msg = "Status on the CRAB server:\t" if 'FAILED' in status: msg += "%s%s%s" % (colors.RED, status, colors.NORMAL) else: if status in TASKDBSTATUSES_TMP: msg += "%s on command %s" % (status, command) else: msg += "%s" % (status) self.logger.info(msg) # Show server and dashboard URL for the task. taskname = urllib.quote(self.cachedinfo['RequestName']) ## CRAB Server UI URL for this task is always useful crabServerUIURL = "https://cmsweb.cern.ch/crabserver/ui/task/" + taskname msg = "%sTask URL to use for HELP:\t%s%s" % ( colors.GREEN, crabServerUIURL, colors.NORMAL) self.logger.info(msg) ## Dashboard monitoring URL only makes sense if submitted to schedd if schedd: dashboardURL = "http://dashb-cms-job.cern.ch/dashboard/templates/task-analysis/#user="******"&refresh=0&table=Jobs&p=1&records=25&activemenu=2&status=&site=&tid=" + taskname self.logger.info("Dashboard monitoring URL:\t%s" % (dashboardURL)) # Print the warning messages (these are the warnings in the Tasks DB, # and/or maybe some warning added by the REST Interface to the status result). if warnings: for warningMsg in warnings: self.logger.warning("%sWarning%s:\t\t\t%s" % (colors.RED, colors.NORMAL, warningMsg)) if failure: #TODO failure should be ignored if the task is not in failure state in the task db msg = "%sFailure message from the server%s:" % (colors.RED, colors.NORMAL) msg += "\t\t%s" % (failure.replace('\n', '\n\t\t\t\t')) self.logger.error(msg)
def __call__(self): serverFactory = CRABClient.Emulator.getEmulator('rest') server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version = __version__) crabDBInfo, jobList = getMutedStatusInfo(self.logger) if not jobList: msg = "%sError%s:" % (colors.RED, colors.NORMAL) msg += " Status information is unavailable, will not proceed with the resubmission." msg += " Try again a few minutes later if the task has just been submitted." self.logger.info(msg) return None publicationEnabled = getColumn(crabDBInfo, "tm_publication") jobsPerStatus = jobList['jobsPerStatus'] if self.options.publication: if publicationEnabled == "F": msg = "Publication was disabled for this task. Therefore, " msg += "there are no publications to resubmit." self.logger.info(msg) return None else: if "finished" not in jobsPerStatus: msg = "No files found to publish" self.logger.info(msg) return None self.jobids = self.processJobIds(jobList) configreq = self.getQueryParams() self.logger.info("Sending resubmit request to the server.") self.logger.debug("Submitting %s " % str(configreq)) configreq_encoded = self._encodeRequest(configreq) self.logger.debug("Encoded resubmit request: %s" % (configreq_encoded)) dictresult, _, _ = server.post(self.uri, data = configreq_encoded) self.logger.debug("Result: %s" % (dictresult)) self.logger.info("Resubmit request sent to the server.") if dictresult['result'][0]['result'] != 'ok': msg = "Server responded with: '%s'" % (dictresult['result'][0]['result']) self.logger.info(msg) returndict = {'status': 'FAILED'} else: if not self.options.wait: msg = "Please use 'crab status' to check how the resubmission process proceeds." msg += "\nNotice it may take a couple of minutes for the resubmission to get fully processed." self.logger.info(msg) else: targetTaskStatus = 'SUBMITTED' checkStatusLoop(self.logger, server, self.uri, self.cachedinfo['RequestName'], targetTaskStatus, self.name) returndict = {'status': 'SUCCESS'} return returndict
def printDAGStatus(self, crabDBInfo, statusCacheInfo): # Get dag status from the node_state/job_log summary dagman_codes = { 1: 'SUBMITTED', 2: 'SUBMITTED', 3: 'SUBMITTED', 4: 'SUBMITTED', 5: 'COMPLETED', 6: 'FAILED' } dagStatus = dagman_codes.get(statusCacheInfo['DagStatus']['DagStatus']) #Unfortunately DAG code for killed task is 6, just as like for finished DAGs with failed jobs #Relabeling the status from 'FAILED' to 'FAILED (KILLED)' if a successful kill command was issued dbstatus = getColumn(crabDBInfo, 'tm_task_status') if dagStatus == 'FAILED' and dbstatus == 'KILLED': dagStatus = 'FAILED (KILLED)' msg = "Status on the scheduler:\t" + dagStatus self.logger.info(msg) return dagStatus
def validateOptions(self): """ Check if the sitelist parameter is a comma separater list of cms sitenames, and put the strings to be passed to the server to self """ SubCommand.validateOptions(self) serverFactory = CRABClient.Emulator.getEmulator('rest') self.server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__) uri = getUrl(self.instance, resource='task') crabDBInfo, _, _ = self.server.get(uri, data={ 'subresource': 'search', 'workflow': self.cachedinfo['RequestName'] }) self.splitting = getColumn(crabDBInfo, 'tm_split_algo') if self.options.publication: if self.options.sitewhitelist is not None or self.options.siteblacklist is not None or \ self.options.maxjobruntime is not None or self.options.maxmemory is not None or \ self.options.numcores is not None or self.options.priority is not None: msg = "The options --sitewhitelist, --siteblacklist," msg += " --maxjobruntime, --maxmemory, --numcores and --priority" msg += " can not be specified together with the option --publication." msg += " The last option is to only resubmit (failed) publications," msg += " in which case all of the first options make no sense." raise ConfigurationException(msg) if self.options.jobids: msg = "The option --jobids" msg += " can not be specified together with the option --publication." msg += " The last option is to only resubmit (failed) publications," msg += " which does not allow yet filtering on job ids (ALL failed publications will be resubmitted)." raise ConfigurationException(msg) if self.options.force: msg = "The option --force" msg += " can not be specified together with the option --publication." msg += " The last option is to only resubmit failed publications." msg += " Publications in a status other than 'failed' can not be resubmitted." raise ConfigurationException(msg) ## The --jobids option indicates which jobs have to be resubmitted. If it is not ## given, then all jobs in the task that are not running or successfully ## completed are resubmitted. If the user provides a list of job ids, then also ## successfully completed jobs can be resubmitted. ## Check the format of the jobids option. if self.options.jobids: jobidstuple = validateJobids(self.options.jobids, self.splitting != 'Automatic') self.jobids = [str(jobid) for (_, jobid) in jobidstuple] ## The --force option should not be accepted unless combined with a user-given ## list of job ids via --jobids. if self.options.force and not self.jobids: msg = "Option --force can only be used in combination with option --jobids." raise ConfigurationException(msg) ## Covention used for the job parameters that the user can set when doing job ## resubmission (i.e. siteblacklist, sitewhitelist, maxjobruntime, maxmemory, ## numcores and priority): ## - If the user doesn't set a parameter we don't pass it to the server and the ## the server copies the original value the parameter had at task submission. ## It copies it from the Task DB. Therefore we need to keep these parameters ## in separate columns of the Task DB containing their original values. ## - For the site black- and whitelists, if the user passes an empty string, ## e.g. --siteblacklist='', we pass to the server siteblacklist=empty and the ## server interprets this as and empty list ([]). If the user passes a given ## list of sites, this new list overwrittes the original one. ## - The values of the parameters are used only for the resubmitted jobs (for ## their first resubmission and all next automatic resubmissions). #Checking if the sites provided by the user are valid cmsnames. Doing this because with only the #server error handling we get: # Server answered with: Invalid input parameter # Reason is: Incorrect 'siteblacklist' parameter #which is not really user friendly. #Moreover, I prefer to be independent from Lexicon. I'll the regex here. sn_re = "^T[1-3]_[A-Z]{2}(_[A-Za-z0-9]+)+$" #sn_re => SiteName_RegularExpression sn_rec = re.compile( sn_re) #sn_rec => SiteName_RegularExpressionCompiled for sitelist in ['sitewhitelist', 'siteblacklist']: if getattr(self.options, sitelist) is not None: if getattr(self.options, sitelist) != "": for site_name in getattr(self.options, sitelist).split(','): if '*' not in site_name and not sn_rec.match( site_name): msg = "The site name '%s' does not look like a valid CMS site name" % ( site_name) msg += " (it is not matching the regular expression '%s')." % ( sn_re) raise ConfigurationException(msg) setattr(self, sitelist, getattr(self.options, sitelist).split(',')) else: setattr(self, sitelist, []) ## Sanity checks for task sizes. Limits are purposely fairly generous to provide ## some level of future-proofing. The server may restrict further. if self.options.maxjobruntime is not None: if self.options.maxjobruntime < 60 or self.options.maxjobruntime > 336 * 60: msg = "The requested maximum job runtime (%d minutes) must be between 60 and 20160 minutes." % ( self.options.maxjobruntime) raise ConfigurationException(msg) if self.options.maxmemory is not None: if self.options.maxmemory < 30 or self.options.maxmemory > 1024 * 30: msg = "The requested per-job memory (%d MB) must be between 30 and 30720 MB." % ( self.options.maxmemory) raise ConfigurationException(msg) if self.options.numcores is not None: if self.options.numcores < 1 or self.options.numcores > 128: msg = "The requested number of cores (%d) must be between 1 and 128." % ( self.options.numcores) raise ConfigurationException(msg) if self.options.priority is not None: if self.options.priority < 1: msg = "The requested priority (%d) must be greater than 0." % ( self.options.priority) raise ConfigurationException(msg)
def validateOptions(self): """ Check if the sitelist parameter is a comma separater list of cms sitenames, and put the strings to be passed to the server to self """ SubCommand.validateOptions(self) serverFactory = CRABClient.Emulator.getEmulator('rest') self.server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version = __version__) uri = self.getUrl(self.instance, resource = 'task') crabDBInfo, _, _ = self.server.get(uri, data = {'subresource': 'search', 'workflow': self.cachedinfo['RequestName']}) self.splitting = getColumn(crabDBInfo, 'tm_split_algo') if self.options.publication: if self.options.sitewhitelist is not None or self.options.siteblacklist is not None or \ self.options.maxjobruntime is not None or self.options.maxmemory is not None or \ self.options.numcores is not None or self.options.priority is not None: msg = "The options --sitewhitelist, --siteblacklist," msg += " --maxjobruntime, --maxmemory, --numcores and --priority" msg += " can not be specified together with the option --publication." msg += " The last option is to only resubmit (failed) publications," msg += " in which case all of the first options make no sense." raise ConfigurationException(msg) if self.options.jobids: msg = "The option --jobids" msg += " can not be specified together with the option --publication." msg += " The last option is to only resubmit (failed) publications," msg += " which does not allow yet filtering on job ids (ALL failed publications will be resubmitted)." raise ConfigurationException(msg) if self.options.force: msg = "The option --force" msg += " can not be specified together with the option --publication." msg += " The last option is to only resubmit failed publications." msg += " Publications in a status other than 'failed' can not be resubmitted." raise ConfigurationException(msg) ## The --jobids option indicates which jobs have to be resubmitted. If it is not ## given, then all jobs in the task that are not running or successfully ## completed are resubmitted. If the user provides a list of job ids, then also ## successfully completed jobs can be resubmitted. ## Check the format of the jobids option. if self.options.jobids: jobidstuple = validateJobids(self.options.jobids, self.splitting != 'Automatic') self.jobids = [str(jobid) for (_, jobid) in jobidstuple] ## The --force option should not be accepted unless combined with a user-given ## list of job ids via --jobids. if self.options.force and not self.jobids: msg = "Option --force can only be used in combination with option --jobids." raise ConfigurationException(msg) ## Covention used for the job parameters that the user can set when doing job ## resubmission (i.e. siteblacklist, sitewhitelist, maxjobruntime, maxmemory, ## numcores and priority): ## - If the user doesn't set a parameter we don't pass it to the server and the ## the server copies the original value the parameter had at task submission. ## It copies it from the Task DB. Therefore we need to keep these parameters ## in separate columns of the Task DB containing their original values. ## - For the site black- and whitelists, if the user passes an empty string, ## e.g. --siteblacklist='', we pass to the server siteblacklist=empty and the ## server interprets this as and empty list ([]). If the user passes a given ## list of sites, this new list overwrittes the original one. ## - The values of the parameters are used only for the resubmitted jobs (for ## their first resubmission and all next automatic resubmissions). #Checking if the sites provided by the user are valid cmsnames. Doing this because with only the #server error handling we get: # Server answered with: Invalid input parameter # Reason is: Incorrect 'siteblacklist' parameter #which is not really user friendly. #Moreover, I prefer to be independent from Lexicon. I'll the regex here. sn_re = "^T[1-3]_[A-Z]{2}(_[A-Za-z0-9]+)+$" #sn_re => SiteName_RegularExpression sn_rec = re.compile(sn_re) #sn_rec => SiteName_RegularExpressionCompiled for sitelist in ['sitewhitelist', 'siteblacklist']: if getattr(self.options, sitelist) is not None: if getattr(self.options, sitelist) != "": for site_name in getattr(self.options, sitelist).split(','): if '*' not in site_name and not sn_rec.match(site_name): msg = "The site name '%s' does not look like a valid CMS site name" % (site_name) msg += " (it is not matching the regular expression '%s')." % (sn_re) raise ConfigurationException(msg) setattr(self, sitelist, getattr(self.options, sitelist).split(',')) else: setattr(self, sitelist, []) ## Sanity checks for task sizes. Limits are purposely fairly generous to provide ## some level of future-proofing. The server may restrict further. if self.options.maxjobruntime is not None: if self.options.maxjobruntime < 60 or self.options.maxjobruntime > 336*60: msg = "The requested maximum job runtime (%d minutes) must be between 60 and 20160 minutes." % (self.options.maxjobruntime) raise ConfigurationException(msg) if self.options.maxmemory is not None: if self.options.maxmemory < 30 or self.options.maxmemory > 1024*30: msg = "The requested per-job memory (%d MB) must be between 30 and 30720 MB." % (self.options.maxmemory) raise ConfigurationException(msg) if self.options.numcores is not None: if self.options.numcores < 1 or self.options.numcores > 128: msg = "The requested number of cores (%d) must be between 1 and 128." % (self.options.numcores) raise ConfigurationException(msg) if self.options.priority is not None: if self.options.priority < 1: msg = "The requested priority (%d) must be greater than 0." % (self.options.priority) raise ConfigurationException(msg)
def printPublication(self, publicationEnabled, jobsPerStatus, asourl, asodb, taskname, user, crabDBInfo): """Print information about the publication of the output files in DBS. """ # Collecting publication information pubStatus = {} if (publicationEnabled and 'finished' in jobsPerStatus): #let's default asodb to asynctransfer, for old task this is empty! asodb = asodb or 'asynctransfer' pubStatus = self.publicationStatus(taskname, asourl, asodb, user) elif not publicationEnabled: pubStatus['status'] = {'disabled': []} pubInfo = {} pubInfo['publication'] = pubStatus.get('status', {}) pubInfo['publicationFailures'] = pubStatus.get('failure_reasons', {}) ## The output datasets are written into the Task DB by the post-job ## when uploading the output files metadata. outdatasets = literal_eval( getColumn(crabDBInfo, 'tm_output_dataset') if getColumn( crabDBInfo, 'tm_output_dataset') else 'None') pubInfo['outdatasets'] = outdatasets pubInfo['jobsPerStatus'] = jobsPerStatus if 'publication' not in pubInfo: return pubStatus ## If publication was disabled, print a pertinent message and return. if 'disabled' in pubInfo['publication']: msg = "\nNo publication information (publication has been disabled in the CRAB configuration file)" self.logger.info(msg) return pubStatus ## List of output datasets that are going to be (or are already) published. This ## list is written into the Tasks DB by the post-job when it does the upload of ## the output files metadata. This means that the list will be empty until one ## of the post-jobs will finish executing. outputDatasets = pubInfo.get('outdatasets') ## If publication information is not available yet, print a pertinent message ## (print first the list of output datasets, without the DAS URL) and return. if not pubInfo['publication']: self.printOutputDatasets(outputDatasets) msg = "\nNo publication information available yet" self.logger.info(msg) return pubStatus ## Case in which there was an error in retrieving the publication status. if 'error' in pubInfo['publication']: msg = "\nPublication status:\t\t%s" % ( pubInfo['publication']['error']) self.logger.info(msg) ## Print the output datasets with the corresponding DAS URL. self.printOutputDatasets(outputDatasets, includeDASURL=True) return pubStatus if pubInfo['publication'] and outputDatasets: states = pubInfo['publication'] ## Don't consider publication states for which 0 files are in this state. states_tmp = states.copy() for status in states: if states[status] == 0: del states_tmp[status] states = states_tmp.copy() ## Count the total number of files to publish. For this we count the number of ## jobs and the number of files to publish per job (which is equal to the number ## of output datasets produced by the task, because, when multiple EDM files are ## produced, each EDM file goes into a different output dataset). numJobs = sum(pubInfo['jobsPerStatus'].values()) numOutputDatasets = len(outputDatasets) numFilesToPublish = numJobs * numOutputDatasets ## Count how many of these files have already started the publication process. numSubmittedFiles = sum(states.values()) ## Substract the above two numbers to obtain how many files have not yet been ## considered for publication. states['unsubmitted'] = numFilesToPublish - numSubmittedFiles ## Print the publication status. statesList = sorted(states) msg = "\nPublication status:\t\t{0} {1}".format(self._printState(statesList[0], 13), \ self._percentageString(statesList[0], states[statesList[0]], numFilesToPublish)) for status in statesList[1:]: if states[status]: msg += "\n\t\t\t\t{0} {1}".format(self._printState(status, 13), \ self._percentageString(status, states[status], numFilesToPublish)) self.logger.info(msg) ## Print the publication errors. if pubInfo.get('publicationFailures'): msg = "\nPublication error summary:" if 'error' in pubInfo['publicationFailures']: msg += "\t%s" % (pubInfo['publicationFailures']['error']) elif pubInfo['publicationFailures'].get('result'): ndigits = int( math.ceil(math.log(numFilesToPublish + 1, 10))) for failureReason, numFailedFiles in pubInfo[ 'publicationFailures']['result']: msg += ( "\n\n\t%" + str(ndigits) + "s files failed to publish with following error message:\n\n\t\t%s" ) % (numFailedFiles, failureReason) self.logger.info(msg) ## Print the output datasets with the corresponding DAS URL. self.printOutputDatasets(outputDatasets, includeDASURL=True) return pubStatus
def __call__(self): # Get all of the columns from the database for a certain task taskname = self.cachedinfo['RequestName'] uri = self.getUrl(self.instance, resource='task') serverFactory = CRABClient.Emulator.getEmulator('rest') server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__) crabDBInfo, _, _ = server.get(uri, data={ 'subresource': 'search', 'workflow': taskname }) self.logger.debug("Got information from server oracle database: %s", crabDBInfo) # Until the task lands on a schedd we'll show the status from the DB combinedStatus = getColumn(crabDBInfo, 'tm_task_status') user = getColumn(crabDBInfo, 'tm_username') webdir = getColumn(crabDBInfo, 'tm_user_webdir') rootDagId = getColumn(crabDBInfo, 'clusterid') #that's the condor id from the TW asourl = getColumn(crabDBInfo, 'tm_asourl') asodb = getColumn(crabDBInfo, 'tm_asodb') publicationEnabled = True if getColumn( crabDBInfo, 'tm_publication') == 'T' else False #Print information from the database self.printTaskInfo(crabDBInfo, user) if not rootDagId: failureMsg = "The task has not been submitted to the Grid scheduler yet. Not printing job information." self.logger.debug(failureMsg) return self.makeStatusReturnDict(crabDBInfo, combinedStatus, statusFailureMsg=failureMsg) self.logger.debug( "The CRAB server submitted your task to the Grid scheduler (cluster ID: %s)" % rootDagId) if not webdir: # Query condor through the server for information about this task uri = self.getUrl(self.instance, resource='workflow') params = {'subresource': 'taskads', 'workflow': taskname} res = server.get(uri, data=params)[0]['result'][0] # JobStatus 5 = Held if res['JobStatus'] == '5' and 'DagmanHoldReason' in res: # If we didn't find a webdir in the DB and the DAG is held, # the task bootstrapping failed before or during the webdir # upload and the reason should be printed. failureMsg = "The task failed to bootstrap on the Grid scheduler." failureMsg += " Please send an e-mail to %s." % (FEEDBACKMAIL) failureMsg += "\nHold reason: %s" % (res['DagmanHoldReason']) self.logger.info(failureMsg) combinedStatus = "FAILED" else: # if the dag is submitted and the webdir is not there we have to wait that AdjustSites run # and upload the webdir location to the server self.logger.info( "Waiting for the Grid scheduler to bootstrap your task") failureMsg = "Schedd has not reported back the webdir (yet)" self.logger.debug(failureMsg) combinedStatus = "UNKNOWN" return self.makeStatusReturnDict(crabDBInfo, combinedStatus, statusFailureMsg=failureMsg) self.logger.debug("Webdir is located at %s", webdir) proxiedWebDir = getProxiedWebDir(taskname, self.serverurl, uri, self.proxyfilename, self.logger.debug) if not proxiedWebDir: msg = "Failed to get the proxied webdir from CRABServer. " msg += "\nWill fall back to the regular webdir url for file downloads " msg += "but will likely fail if the client is located outside CERN." self.logger.debug(msg) proxiedWebDir = webdir self.logger.debug("Proxied webdir is located at %s", proxiedWebDir) # Download status_cache file url = proxiedWebDir + "/status_cache" self.logger.debug("Retrieving 'status_cache' file from %s", url) statusCacheInfo = None try: statusCacheData = getDataFromURL(url, self.proxyfilename) except HTTPException as ce: self.logger.info( "Waiting for the Grid scheduler to report back the status of your task" ) failureMsg = "Cannot retrieve the status_cache file. Maybe the task process has not run yet?" failureMsg += " Got:\n%s" % ce self.logger.error(failureMsg) logging.getLogger("CRAB3").exception(ce) combinedStatus = "UNKNOWN" return self.makeStatusReturnDict(crabDBInfo, combinedStatus, statusFailureMsg=failureMsg) else: # We skip first two lines of the file because they contain the checkpoint locations # for the job_log / fjr_parse_results files and are used by the status caching script. # Load the job_report summary statusCacheInfo = literal_eval(statusCacheData.split('\n')[2]) self.logger.debug("Got information from status cache file: %s", statusCacheInfo) # If the task is already on the grid, show the dagman status combinedStatus = dagStatus = self.printDAGStatus( crabDBInfo, statusCacheInfo) shortResult = self.printShort(statusCacheInfo) pubStatus = self.printPublication(publicationEnabled, shortResult['jobsPerStatus'], asourl, asodb, taskname, user, crabDBInfo) self.printErrors(statusCacheInfo) if self.options.summary: self.printSummary(statusCacheInfo) if self.options.long or self.options.sort: # If user correctly passed some jobid CSVs to use in the status --long, self.jobids # will be a list of strings already parsed from the input by the validateOptions() if self.jobids: self.checkUserJobids(statusCacheInfo, self.jobids) sortdict = self.printLong(statusCacheInfo, self.jobids, quiet=(not self.options.long)) if self.options.sort: self.printSort(sortdict, self.options.sort) if self.options.json: self.logger.info(json.dumps(statusCacheInfo)) statusDict = self.makeStatusReturnDict(crabDBInfo, combinedStatus, dagStatus, '', shortResult, statusCacheInfo, pubStatus, proxiedWebDir) return statusDict
def makeStatusReturnDict(self, crabDBInfo, combinedStatus, dagStatus='', statusFailureMsg='', shortResult={}, statusCacheInfo={}, pubStatus={}, proxiedWebDir=''): """ Create a dictionary which is mostly identical to the dictionary that was being returned by the old status (plus a few other keys needed by the other client commands). This is to ensure backward compatibility after the status2 transition for users relying on this dictionary in their scripts. """ statusDict = {} statusDict['status'] = combinedStatus statusDict['dbStatus'] = getColumn(crabDBInfo, 'tm_task_status') statusDict['dagStatus'] = dagStatus statusDict['username'] = getColumn(crabDBInfo, 'tm_username') statusDict['taskFailureMsg'] = getColumn(crabDBInfo, 'tm_task_failure') statusDict['taskWarningMsg'] = getColumn(crabDBInfo, 'tm_task_warnings') statusDict['outdatasets'] = getColumn(crabDBInfo, 'tm_output_dataset') statusDict['schedd'] = getColumn(crabDBInfo, 'tm_schedd') statusDict['collector'] = getColumn(crabDBInfo, 'tm_collector') statusDict['ASOURL'] = getColumn(crabDBInfo, 'tm_asourl') statusDict['command'] = getColumn(crabDBInfo, 'tm_task_command') statusDict['publicationEnabled'] = True if getColumn( crabDBInfo, 'tm_publication') == 'T' else False statusDict['userWebDirURL'] = getColumn(crabDBInfo, 'tm_user_webdir') statusDict['inputDataset'] = getColumn(crabDBInfo, 'tm_input_dataset') dbStartTime = getColumn(crabDBInfo, 'tm_start_time') statusDict['submissionTime'] = getEpochFromDBTime( datetime.strptime(dbStartTime, '%Y-%m-%d %H:%M:%S.%f')) statusDict['statusFailureMsg'] = statusFailureMsg statusDict['proxiedWebDir'] = proxiedWebDir statusDict['jobsPerStatus'] = shortResult.get('jobsPerStatus', {}) statusDict['jobList'] = shortResult.get('jobList', {}) statusDict['publication'] = pubStatus.get('status', {}) statusDict['publicationFailures'] = pubStatus.get( 'failure_reasons', {}) statusDict['jobs'] = statusCacheInfo return statusDict
def __call__(self): serverFactory = CRABClient.Emulator.getEmulator('rest') server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__) crabDBInfo, jobList = getMutedStatusInfo(self.logger) if not jobList: msg = "%sError%s:" % (colors.RED, colors.NORMAL) msg += " Status information is unavailable, will not proceed with the resubmission." msg += " Try again a few minutes later if the task has just been submitted." self.logger.info(msg) return None publicationEnabled = getColumn(crabDBInfo, "tm_publication") jobsPerStatus = jobList['jobsPerStatus'] if self.options.publication: if publicationEnabled == "F": msg = "Publication was disabled for this task. Therefore, " msg += "there are no publications to resubmit." self.logger.info(msg) return None else: if "finished" not in jobsPerStatus: msg = "No files found to publish" self.logger.info(msg) return None self.jobids = self.processJobIds(jobList) configreq = self.getQueryParams() self.logger.info("Sending resubmit request to the server.") self.logger.debug("Submitting %s " % str(configreq)) configreq_encoded = self._encodeRequest(configreq) self.logger.debug("Encoded resubmit request: %s" % (configreq_encoded)) dictresult, _, _ = server.post(self.uri, data=configreq_encoded) self.logger.debug("Result: %s" % (dictresult)) self.logger.info("Resubmit request sent to the server.") if dictresult['result'][0]['result'] != 'ok': msg = "Server responded with: '%s'" % ( dictresult['result'][0]['result']) self.logger.info(msg) returndict = {'status': 'FAILED'} else: if not self.options.wait: msg = "Please use 'crab status' to check how the resubmission process proceeds." msg += "\nNotice it may take a couple of minutes for the resubmission to get fully processed." self.logger.info(msg) else: targetTaskStatus = 'SUBMITTED' checkStatusLoop(self.logger, server, self.uri, self.cachedinfo['RequestName'], targetTaskStatus, self.name) returndict = {'status': 'SUCCESS'} return returndict