def _tryFailoverTransfer(self, tarFileName, tarFileDir): """tries to upload the log tarBall to the failoverSE and creates moving request""" failoverTransfer = FailoverTransfer(self._getRequestContainer()) ##determine the experiment self.failoverSEs = self.ops.getValue( "Production/%s/FailOverSE" % self.experiment, self.failoverSEs) catalogs = self.ops.getValue( 'Production/%s/Catalogs' % self.experiment, ['FileCatalog', 'LcgFileCatalog']) random.shuffle(self.failoverSEs) self.log.info( "Attempting to store file %s to the following SE(s):\n%s" % (tarFileName, ', '.join(self.failoverSEs))) result = failoverTransfer.transferAndRegisterFile( tarFileName, '%s/%s' % (tarFileDir, tarFileName), self.logLFNPath, self.failoverSEs, fileMetaDict={"GUID": None}, fileCatalog=catalogs) if not result['OK']: self.log.error('Failed to upload logs to all destinations') self.setApplicationStatus('Failed To Upload Logs') return S_OK( ) #because if the logs are lost, it's not the end of the world. #Now after all operations, return potentially modified request object return S_OK({ 'Request': failoverTransfer.request, 'uploadedSE': result['Value']['uploadedSE'] })
def execute(self, production_id=None, prod_job_id=None, wms_job_id=None, workflowStatus=None, stepStatus=None, wf_commons=None, step_commons=None, step_number=None, step_id=None, orderedSEs=None): """ Main execution function. """ try: super(UserJobFinalization, self).execute(self.version, production_id, prod_job_id, wms_job_id, workflowStatus, stepStatus, wf_commons, step_commons, step_number, step_id) self._resolveInputVariables() # Earlier modules may have populated the report objects self.request.RequestName = 'job_%d_request.xml' % self.jobID self.request.JobID = self.jobID self.request.SourceComponent = "Job_%d" % self.jobID if not self._checkWFAndStepStatus(): return S_OK() if not self.userOutputData: self.log.info( "No user output data is specified for this job, nothing to do" ) return S_OK("No output data to upload") self.log.info("User specified output file list is: %s" % (', '.join(self.userOutputData))) globList = [] for i in self.userOutputData: if re.search('\*', i): globList.append(i) # Check whether list of userOutputData is a globbable pattern if globList: for i in globList: self.userOutputData.remove(i) globbedOutputList = list(set(getGlobbedFiles(globList))) if globbedOutputList: self.log.info( 'Found a pattern in the output data file list, \ extra files to upload are: %s' % (', '.join(globbedOutputList))) self.userOutputData += globbedOutputList else: self.log.info( "No files were found on the local disk for the following patterns: %s" % (', '.join(globList))) self.log.info("Final list of files to upload are: %s" % (', '.join(self.userOutputData))) # Determine the final list of possible output files for the workflow and all the parameters needed to upload them. outputList = [] for i in self.userOutputData: outputList.append({ 'outputDataType': ('.'.split(i)[-1]).upper(), 'outputDataName': os.path.basename(i) }) userOutputLFNs = [] if self.userOutputData: self.log.info("Constructing user output LFN(s) for %s" % (', '.join(self.userOutputData))) userOutputLFNs = constructUserLFNs(self.jobID, self._getCurrentOwner(), self.userOutputData, self.userOutputPath, self.userPrependString) self.log.verbose( "Calling getCandidateFiles( %s, %s, %s)" % (outputList, userOutputLFNs, self.outputDataFileMask)) try: fileDict = self.getCandidateFiles(outputList, userOutputLFNs, self.outputDataFileMask) except os.error as e: self.setApplicationStatus(e) return S_OK() try: fileMetadata = self.getFileMetadata(fileDict) except RuntimeError as e: self.setApplicationStatus(e) return S_OK() if not fileMetadata: self.log.info( "No output data files were determined to be uploaded for this workflow" ) self.setApplicationStatus('No Output Data Files To Upload') return S_OK() if not orderedSEs: orderedSEs = self._getOrderedSEsList() self.log.info("Ordered list of output SEs is: %s" % (', '.join(orderedSEs))) final = {} for fileName, metadata in fileMetadata.items(): final[fileName] = metadata final[fileName]['resolvedSE'] = orderedSEs # At this point can exit and see exactly what the module will upload if not self._enableModule(): self.log.info( "Module disabled would have attempted to upload the files %s" % ', '.join(final.keys())) for fileName, metadata in final.items(): self.log.info('--------%s--------' % fileName) for n, v in metadata.items(): self.log.info('%s = %s' % (n, v)) return S_OK("Module is disabled by control flag") # Disable the watchdog check in case the file uploading takes a long time self._disableWatchdogCPUCheck() # Instantiate the failover transfer client with the global request object if not self.failoverTransfer: self.failoverTransfer = FailoverTransfer(self.request) # One by one upload the files with failover if necessary replication = {} failover = {} uploaded = [] for fileName, metadata in final.items(): self.log.info( "Attempting to store %s to the following SE(s): %s" % (fileName, ', '.join(metadata['resolvedSE']))) fileMetaDict = { 'Size': metadata['filedict']['Size'], 'LFN': metadata['filedict']['LFN'], 'GUID': metadata['filedict']['GUID'], 'Checksum': metadata['filedict']['Checksum'], 'ChecksumType': metadata['filedict']['ChecksumType'] } result = self.failoverTransfer.transferAndRegisterFile( fileName=fileName, localPath=metadata['localpath'], lfn=metadata['filedict']['LFN'], destinationSEList=metadata['resolvedSE'], fileMetaDict=fileMetaDict, masterCatalogOnly=True) if not result['OK']: self.log.error( "Could not transfer and register %s with metadata:\n %s" % (fileName, metadata)) failover[fileName] = metadata else: # Only attempt replication after successful upload lfn = metadata['lfn'] uploaded.append(lfn) seList = metadata['resolvedSE'] replicateSE = '' uploadedSE = result['Value'].get('uploadedSE', '') if uploadedSE: for se in seList: if not se == uploadedSE: replicateSE = se break if replicateSE and lfn and self.replicateUserOutputData: self.log.info("Will attempt to replicate %s to %s" % (lfn, replicateSE)) replication[lfn] = (uploadedSE, replicateSE, fileMetaDict) cleanUp = False for fileName, metadata in failover.items(): random.shuffle(self.failoverSEs) targetSE = metadata['resolvedSE'][0] if len(metadata['resolvedSE']) > 1: replicateSE = metadata['resolvedSE'][1] else: replicateSE = '' metadata['resolvedSE'] = self.failoverSEs fileMetaDict = { 'Size': metadata['filedict']['Size'], 'LFN': metadata['filedict']['LFN'], 'GUID': metadata['filedict']['GUID'] } result = self.failoverTransfer.transferAndRegisterFileFailover( fileName, metadata['localpath'], metadata['lfn'], targetSE, metadata['resolvedSE'], fileMetaDict=fileMetaDict, masterCatalogOnly=True) if not result['OK']: self.log.error( "Could not transfer and register %s with metadata:\n %s" % (fileName, metadata)) cleanUp = True continue # for users can continue even if one completely fails else: lfn = metadata['lfn'] uploaded.append(lfn) # Even when using Failover, one needs to replicate to a second SE if replicateSE and self.replicateUserOutputData: replication[lfn] = (targetSE, replicateSE, fileMetaDict) # For files correctly uploaded must report LFNs to job parameters if uploaded: report = ', '.join(uploaded) self.setJobParameter('UploadedOutputData', report) # Now after all operations, retrieve potentially modified request object self.request = self.failoverTransfer.request # If some or all of the files failed to be saved to failover if cleanUp: self.workflow_commons['Request'] = self.request # Leave any uploaded files just in case it is useful for the user # do not try to replicate any files. return S_ERROR("Failed To Upload Output Data") for lfn, (uploadedSE, repSE, fileMetaDictItem) in replication.items(): self.failoverTransfer._setFileReplicationRequest( lfn, repSE, fileMetaDictItem, uploadedSE) self.workflow_commons['Request'] = self.failoverTransfer.request self.generateFailoverFile() self.setApplicationStatus("Job Finished Successfully") return S_OK('Output data uploaded') except Exception as e: # pylint:disable=broad-except self.log.exception("Failure in UserJobFinalization execute module", lException=e) self.setApplicationStatus(repr(e)) return S_ERROR(str(e)) finally: super(UserJobFinalization, self).finalize(self.version)
def execute(self, production_id=None, prod_job_id=None, wms_job_id=None, workflowStatus=None, stepStatus=None, wf_commons=None, step_commons=None, step_number=None, step_id=None): """ Main executon method """ try: super(UploadLogFile, self).execute(self.version, production_id, prod_job_id, wms_job_id, workflowStatus, stepStatus, wf_commons, step_commons, step_number, step_id) self._resolveInputVariables() self.request.RequestName = 'job_%d_request.xml' % self.jobID self.request.JobID = self.jobID self.request.SourceComponent = "Job_%d" % self.jobID res = systemCall(0, shlex.split('ls -al')) if res['OK'] and res['Value'][0] == 0: self.log.info('The contents of the working directory...') self.log.info(str(res['Value'][1])) else: self.log.error('Failed to list the log directory', str(res['Value'][2])) self.log.info('PRODUCTION_ID = %s, JOB_ID = %s ' % (self.production_id, self.prod_job_id)) self.logdir = os.path.realpath('./job/log/%s/%s' % (self.production_id, self.prod_job_id)) self.log.info('Selected log files will be temporarily stored in %s' % self.logdir) ########################################## # First determine the files which should be saved self.log.info('Determining the files to be saved in the logs.') res = self._determineRelevantFiles() if not res['OK']: self.log.error('Completely failed to select relevant log files.', res['Message']) return S_OK() selectedFiles = res['Value'] self.log.info('The following %s files were selected to be saved:\n%s' % (len(selectedFiles), '\n'.join(selectedFiles))) ######################################### # Create a temporary directory containing these files self.log.info('Populating a temporary directory for selected files.') res = self.__populateLogDirectory(selectedFiles) if not res['OK']: self.log.error('Completely failed to populate temporary log file directory.', res['Message']) self.setApplicationStatus('Failed To Populate Log Dir') return S_OK() self.log.info('%s populated with log files.' % self.logdir) ######################################### # Make sure all the files in the log directory have the correct permissions result = self.__setLogFilePermissions(self.logdir) if not result['OK']: self.log.error('Could not set permissions of log files to 0755 with message:\n%s' % (result['Message'])) # Instantiate the failover transfer client with the global request object if not self.failoverTransfer: self.failoverTransfer = FailoverTransfer(self.request) ######################################### if not self._enableModule(): self.log.info("Would have attempted to upload log files, but there's not JobID") return S_OK() # Attempt to uplaod logs to the LogSE self.log.info('Transferring log files to the %s' % self.logSE) res = returnSingleResult(StorageElement(self.logSE).getURL(self.logFilePath, protocol='https')) if not res['OK']: self.log.warn("Could not get dynamic URL for log", res) logHttpsURL = "http://lhcb-logs.cern.ch/storage%s" % self.logFilePath else: logHttpsURL = res['Value'] logURL = '<a href="%s">Log file directory</a>' % logHttpsURL self.log.info('Logs for this job may be retrieved from %s' % logURL) self.log.info('putDirectory %s %s %s' % (self.logFilePath, os.path.realpath(self.logdir), self.logSE)) res = returnSingleResult(StorageElement(self.logSE).putDirectory( {self.logFilePath: os.path.realpath(self.logdir)})) self.log.verbose(res) self.setJobParameter('Log URL', logURL) if res['OK']: self.log.info('Successfully upload log directory to %s' % self.logSE) else: self.log.error("Failed to upload log files with message '%s', uploading to failover SE" % res['Message']) # make a tar file tarFileName = os.path.basename(self.logLFNPath) try: res = tarFiles(tarFileName, selectedFiles, compression='gz') if not res['OK']: self.log.error('Failed to create tar of log files: %s' % res['Message']) self.setApplicationStatus('Failed to create tar of log files') # We do not fail the job for this case return S_OK() except IOError: self.log.error('Failed to create tar of log files: %s' % res['Message']) self.setApplicationStatus('Failed to create tar of log files') # We do not fail the job for this case return S_OK() self._uploadLogToFailoverSE(tarFileName) self.workflow_commons['Request'] = self.request return S_OK("Log Files uploaded") except Exception as e: # pylint:disable=broad-except self.log.exception("Failure in UploadLogFile execute module", lException=e) return S_ERROR(str(e)) finally: super(UploadLogFile, self).finalize(self.version)
def execute(self, production_id=None, prod_job_id=None, wms_job_id=None, workflowStatus=None, stepStatus=None, wf_commons=None, step_commons=None, step_number=None, step_id=None, SEs=None, fileDescendants=None): """ Main execution function. 1. Determine the final list of possible output files for the workflow and all the parameters needed to upload them. 2. Verifying that the input files have no descendants (and exiting with error, otherwise) 3. Sending the BK records for the steps of the job 4. Transfer output files in their destination, register in the FC (with failover) 5. Registering the output files in the Bookkeeping """ try: super(UploadOutputData, self).execute(self.version, production_id, prod_job_id, wms_job_id, workflowStatus, stepStatus, wf_commons, step_commons, step_number, step_id) # This returns all Tier1-Failover unless a specific one is defined for the site self.failoverSEs = getDestinationSEList('Tier1-Failover', self.siteName, outputmode='Any') random.shuffle(self.failoverSEs) self._resolveInputVariables() if not self._checkWFAndStepStatus(): return S_OK( "Failures detected in previous steps: no output data upload attempted" ) # ## 1. Determine the final list of possible output files # ## for the workflow and all the parameters needed to upload them. # ## self.log.verbose("Getting the list of candidate files") fileDict = self.getCandidateFiles(self.outputList, self.prodOutputLFNs, self.outputDataFileMask, self.outputDataStep) fileMetadata = self.getFileMetadata(fileDict) if not fileMetadata: self.log.info( "No output data files were determined to be uploaded for this workflow" ) return S_OK() # Get final, resolved SE list for files final = {} for fileName, metadata in fileMetadata.iteritems(): if not SEs: resolvedSE = getDestinationSEList( metadata['workflowSE'], self.siteName, self.outputMode, self.workflow_commons.get('runNumber')) else: resolvedSE = SEs final[fileName] = metadata final[fileName]['resolvedSE'] = resolvedSE self.log.info("The following files will be uploaded", ": %s" % (', '.join(final.keys()))) for fileName, metadata in final.items(): self.log.info('--------%s--------' % fileName) for name, val in metadata.iteritems(): self.log.info('%s = %s' % (name, val)) if not self._enableModule(): # At this point can exit and see exactly what the module would have uploaded self.log.info( "Module disabled", "would have attempted to upload the files %s" % ', '.join(final.keys())) # ## 2. Prior to uploading any files must check (for productions with input data) that no descendant files # ## already exist with replica flag in the BK. # ## if self.inputDataList: if fileDescendants is not None: lfnsWithDescendants = fileDescendants else: if not self._enableModule(): self.log.info( "Module disabled", "would have attempted to check the files %s" % ', '.join(self.inputDataList)) lfnsWithDescendants = [] else: lfnsWithDescendants = getFileDescendants( self.production_id, self.inputDataList, dm=self.dataManager, bkClient=self.bkClient) if not lfnsWithDescendants: self.log.info( "No descendants found, outputs can be uploaded") else: self.log.error( "Found descendants!!! Outputs won't be uploaded") self.log.info("Files with descendants", ": %s" ' % '.join(lfnsWithDescendants)) self.log.info( "The files above will be set as 'Processed', other lfns in input will be later reset as Unused" ) self.fileReport.setFileStatus(int(self.production_id), lfnsWithDescendants, 'Processed') return S_ERROR("Input Data Already Processed") # ## 3. Sending the BK records for the steps of the job # ## bkFileExtensions = ['bookkeeping*.xml'] bkFiles = [] for ext in bkFileExtensions: self.log.debug("Looking at BK record wildcard: %s" % ext) globList = glob.glob(ext) for check in globList: if os.path.isfile(check): self.log.verbose( "Found locally existing BK file record", ": %s" % check) bkFiles.append(check) # Unfortunately we depend on the file names to order the BK records bkFilesListTuples = [] for bk in bkFiles: bkFilesListTuples.append( (bk, int(bk.split('_')[-1].split('.')[0]))) bkFiles = [ bk[0] for bk in sorted(bkFilesListTuples, key=itemgetter(1)) ] self.log.info("The following BK records will be sent", ": %s" % (', '.join(bkFiles))) if self._enableModule(): for bkFile in bkFiles: with open(bkFile, 'r') as fd: bkXML = fd.read() self.log.info("Sending BK record", ":\n%s" % (bkXML)) result = self.bkClient.sendXMLBookkeepingReport(bkXML) self.log.verbose(result) if result['OK']: self.log.info("Bookkeeping report sent", "for %s" % bkFile) else: self.log.error( "Could not send Bookkeeping XML file to server", ": %s" % result['Message']) self.log.info("Preparing DISET request", "for %s" % bkFile) bkDISETReq = Operation() bkDISETReq.Type = 'ForwardDISET' bkDISETReq.Arguments = DEncode.encode( result['rpcStub']) self.request.addOperation(bkDISETReq) self.workflow_commons[ 'Request'] = self.request # update each time, just in case else: self.log.info( "Would have attempted to send bk records, but module is disabled" ) # ## 4. Transfer output files in their destination, register in the FC (with failover) # ## # Disable the watchdog check in case the file uploading takes a long time self._disableWatchdogCPUCheck() # Instantiate the failover transfer client with the global request object if not self.failoverTransfer: self.failoverTransfer = FailoverTransfer(self.request) # Track which files are successfully uploaded (not to failover) via performBKRegistration = [] # Failover replicas are always added to the BK when they become available (actually, added to all the catalogs) failover = {} for fileName, metadata in final.items(): targetSE = metadata['resolvedSE'] self.log.info( "Attempting to store file to SE", "%s to the following SE(s):\n%s" % (fileName, ', '.join(targetSE))) fileMetaDict = { 'Size': metadata['filedict']['Size'], 'LFN': metadata['filedict']['LFN'], 'GUID': metadata['filedict']['GUID'], 'Checksum': metadata['filedict']['Checksum'], 'ChecksumType': metadata['filedict']['ChecksumType'] } if not self._enableModule(): # At this point can exit and see exactly what the module would have uploaded self.log.info( "Module disabled", "would have attempted to upload file %s" % fileName) continue result = self.failoverTransfer.transferAndRegisterFile( fileName=fileName, localPath=metadata['localpath'], lfn=metadata['filedict']['LFN'], destinationSEList=targetSE, fileMetaDict=fileMetaDict, masterCatalogOnly=True) if not result['OK']: self.log.error( "Could not transfer and register", " %s with metadata:\n %s" % (fileName, metadata)) failover[fileName] = metadata else: self.log.info( "File uploaded, will be registered in BK if all files uploaded for job", "(%s)" % fileName) # if the files are uploaded in the SE, independently if the registration in the FC is done, # then we have to register all of them in the BKK performBKRegistration.append(metadata) cleanUp = False for fileName, metadata in failover.items(): self.log.info( "Setting default catalog for failover transfer registration to master catalog" ) random.shuffle(self.failoverSEs) targetSE = metadata['resolvedSE'][0] metadata['resolvedSE'] = self.failoverSEs fileMetaDict = { 'Size': metadata['filedict']['Size'], 'LFN': metadata['filedict']['LFN'], 'GUID': metadata['filedict']['GUID'], 'Checksum': metadata['filedict']['Checksum'], 'ChecksumType': metadata['filedict']['ChecksumType'] } if not self._enableModule(): # At this point can exit and see exactly what the module would have uploaded self.log.info( "Module disabled", "would have attempted to upload with failover file %s" % fileName) continue result = self.failoverTransfer.transferAndRegisterFileFailover( fileName=fileName, localPath=metadata['localpath'], lfn=metadata['filedict']['LFN'], targetSE=targetSE, failoverSEList=metadata['resolvedSE'], fileMetaDict=fileMetaDict, masterCatalogOnly=True) if not result['OK']: self.log.error( "Could not transfer and register", "%s in failover with metadata:\n %s" % (fileName, metadata)) cleanUp = True break # no point continuing if one completely fails # Now after all operations, retrieve potentially modified request object self.request = self.failoverTransfer.request # If some or all of the files failed to be saved even to failover if cleanUp and self._enableModule(): self._cleanUp(final) self.workflow_commons['Request'] = self.request return S_ERROR('Failed to upload output data') # For files correctly uploaded must report LFNs to job parameters if final and self._enableModule(): report = ', '.join(final.keys()) self.setJobParameter('UploadedOutputData', report) # ## 5. Can now register the successfully uploaded files in the BK i.e. set the BK replica flags # ## if not performBKRegistration: self.log.info( "There are no files to perform the BK registration for, all are in failover" ) elif self._enableModule(): # performing BK registration # Getting what should be registered immediately, and what later lfnsToRegisterInBK = set([ metadata['filedict']['LFN'] for metadata in performBKRegistration ]) lfnsToRegisterInBKNow = self._getLFNsForBKRegistration( lfnsToRegisterInBK) lfnsToRegisterInBKLater = list(lfnsToRegisterInBK - set(lfnsToRegisterInBKNow)) # Registering what should be registering immediately, and handling failures result = FileCatalog( catalogs=['BookkeepingDB']).addFile(lfnsToRegisterInBKNow) self.log.verbose("BookkeepingDB.addFile: %s" % result) if not result['OK']: self.log.error(result) return S_ERROR("Could Not Perform BK Registration") if 'Failed' in result['Value'] and result['Value']['Failed']: for lfn, error in result['Value']['Failed'].iteritems(): lfnMetadata = {} for lfnMD in performBKRegistration: if lfnMD[ 'lfn'] == lfn: # the lfn is indeed both at lfnMD['lfn'] and at lfnMD['filedict']['LFN'] lfnMetadata = lfnMD['filedict'] break self.setBKRegistrationRequest(lfn, error=error, metaData=lfnMetadata) # Adding a registration request for what whould be registered later if lfnsToRegisterInBKLater: for lfnMD in performBKRegistration: if lfnMD['lfn'] in lfnsToRegisterInBKLater: lfnMetadata = lfnMD['filedict'] self.setBKRegistrationRequest(lfnMD['lfn'], metaData=lfnMetadata) self.workflow_commons['Request'] = self.request return S_OK("Output data uploaded") except Exception as e: # pylint:disable=broad-except self.log.exception('Exception in UploadOutputData', lException=e) self.setApplicationStatus(repr(e)) return S_ERROR(str(e)) finally: super(UploadOutputData, self).finalize(self.version)
def execute(self): """ Main execution function. """ self.log.info('Initializing %s' % self.version) result = self.resolveInputVariables() if not result['OK']: self.log.error("Failed to resolve input parameters:", result['Message']) return result if not self.workflowStatus['OK'] or not self.stepStatus['OK']: self.log.verbose('Workflow status = %s, step status = %s' % (self.workflowStatus['OK'], self.stepStatus['OK'])) return S_OK('No output data upload attempted') ##determine the experiment example_file = self.prodOutputLFNs[0] if "/ilc/prod/clic" in example_file: self.experiment = "CLIC" elif "/ilc/prod/ilc/sid" in example_file: self.experiment = 'ILC_SID' elif "/ilc/prod/ilc/mc-dbd" in example_file: self.experiment = 'ILC_ILD' else: self.log.warn("Failed to determine experiment, reverting to default") #Determine the final list of possible output files for the #workflow and all the parameters needed to upload them. result = self.getCandidateFiles(self.outputList, self.prodOutputLFNs, self.outputDataFileMask) if not result['OK']: self.log.error(result['Message']) self.setApplicationStatus(result['Message']) return result fileDict = result['Value'] result = self.getFileMetadata(fileDict) if not result['OK']: self.log.error(result['Message']) self.setApplicationStatus(result['Message']) return result if not result['Value']: self.log.info('No output data files were determined to be uploaded for this workflow') return S_OK() fileMetadata = result['Value'] #Get final, resolved SE list for files final = {} for fileName, metadata in fileMetadata.items(): result = getDestinationSEList(metadata['workflowSE'], DIRAC.siteName(), self.outputMode) if not result['OK']: self.log.error('Could not resolve output data SE', result['Message']) self.setApplicationStatus('Failed To Resolve OutputSE') return result resolvedSE = result['Value'] final[fileName] = metadata final[fileName]['resolvedSE'] = resolvedSE self.log.info('The following files will be uploaded: %s' % (', '.join(final.keys() ))) for fileName, metadata in final.items(): self.log.info('--------%s--------' % fileName) for metaName, metaValue in metadata.items(): self.log.info('%s = %s' % (metaName, metaValue)) #At this point can exit and see exactly what the module would have uploaded if not self.enable: self.log.info('Module is disabled by control flag, would have attempted to upload the \ following files %s' % ', '.join(final.keys())) return S_OK('Module is disabled by control flag') #Disable the watchdog check in case the file uploading takes a long time self.log.info('Creating DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK in order to disable the Watchdog prior to upload') fopen = open('DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK','w') fopen.write('%s' % time.asctime()) fopen.close() #Instantiate the failover transfer client with the global request object failoverTransfer = FailoverTransfer(self._getRequestContainer()) catalogs = self.ops.getValue('Production/%s/Catalogs' % self.experiment, ['FileCatalog', 'LcgFileCatalog']) #One by one upload the files with failover if necessary failover = {} if not self.failoverTest: for fileName, metadata in final.iteritems(): self.log.info("Attempting to store file %s to the following SE(s):\n%s" % (fileName, ', '.join(metadata['resolvedSE']))) result = failoverTransfer.transferAndRegisterFile(fileName, metadata['localpath'], metadata['lfn'], metadata['resolvedSE'], fileMetaDict = metadata['filedict'], fileCatalog = catalogs) if not result['OK']: self.log.error('Could not transfer and register %s with metadata:\n %s' % (fileName, metadata['filedict'])) failover[fileName] = metadata else: #lfn = metadata['lfn'] pass else: failover = final self.failoverSEs = self.ops.getValue("Production/%s/FailOverSE" % self.experiment, self.failoverSEs) cleanUp = False for fileName, metadata in failover.iteritems(): self.log.info('Setting default catalog for failover transfer to FileCatalog') failovers = self.failoverSEs targetSE = metadata['resolvedSE'][0] try:#remove duplicate site, otherwise it will do nasty things where processing the request failovers.remove(targetSE) except ValueError: pass random.shuffle(failovers) metadata['resolvedSE'] = failovers result = failoverTransfer.transferAndRegisterFileFailover(fileName, metadata['localpath'], metadata['lfn'], targetSE, metadata['resolvedSE'], fileMetaDict = metadata['filedict'], fileCatalog = catalogs) if not result['OK']: self.log.error('Could not transfer and register %s with metadata:\n %s' % (fileName, metadata['filedict'])) cleanUp = True break #no point continuing if one completely fails os.remove("DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK") #cleanup the mess self.workflow_commons['Request'] = failoverTransfer.request #If some or all of the files failed to be saved to failover if cleanUp: lfns = [] for fileName, metadata in final.items(): lfns.append(metadata['lfn']) result = self._cleanUp(lfns) return S_ERROR('Failed to upload output data') return S_OK('Output data uploaded')
def execute(self): """ Main execution function. """ #Have to work out if the module is part of the last step i.e. #user jobs can have any number of steps and we only want #to run the finalization once. Not a problem if this is not the last step so return S_OK() resultLS = self.isLastStep() if not resultLS['OK']: return S_OK() self.logWorkingDirectory() resultIV = self.resolveInputVariables() if not resultIV['OK']: self.log.error("Failed to resolve input parameters:", resultIV['Message']) return resultIV self.log.info('Initializing %s' % self.version) if not self.workflowStatus['OK'] or not self.stepStatus['OK']: self.log.verbose('Workflow status = %s, step status = %s' % (self.workflowStatus['OK'], self.stepStatus['OK'])) return S_OK('No output data upload attempted') if not self.userOutputData: self.log.info('No user output data is specified for this job, nothing to do') return S_OK('No output data to upload') #Determine the final list of possible output files for the #workflow and all the parameters needed to upload them. outputList = self.getOutputList() userOutputLFNs = [] if self.userOutputData: resultOLfn = self.constructOutputLFNs() if not resultOLfn['OK']: self.log.error('Could not create user LFNs', resultOLfn['Message']) return resultOLfn userOutputLFNs = resultOLfn['Value'] self.log.verbose('Calling getCandidateFiles( %s, %s, %s)' % (outputList, userOutputLFNs, self.outputDataFileMask)) self.log.debug("IgnoreAppErrors? '%s' " % self.ignoreapperrors) resultCF = self.getCandidateFiles(outputList, userOutputLFNs, self.outputDataFileMask) if not resultCF['OK']: if not self.ignoreapperrors: self.log.error(resultCF['Message']) self.setApplicationStatus(resultCF['Message']) return S_OK() fileDict = resultCF['Value'] resultFMD = self.getFileMetadata(fileDict) if not resultFMD['OK']: if not self.ignoreapperrors: self.log.error(resultFMD['Message']) self.setApplicationStatus(resultFMD['Message']) return S_OK() if not resultFMD['Value']: if not self.ignoreapperrors: self.log.info('No output data files were determined to be uploaded for this workflow') self.setApplicationStatus('No Output Data Files To Upload') return S_OK() fileMetadata = resultFMD['Value'] #First get the local (or assigned) SE to try first for upload and others in random fashion resultSEL = getDestinationSEList('Tier1-USER', DIRAC.siteName(), outputmode='local') if not resultSEL['OK']: self.log.error('Could not resolve output data SE', resultSEL['Message']) self.setApplicationStatus('Failed To Resolve OutputSE') return resultSEL localSE = resultSEL['Value'] orderedSEs = [ se for se in self.defaultOutputSE if se not in localSE and se not in self.userOutputSE] orderedSEs = localSE + List.randomize(orderedSEs) if self.userOutputSE: prependSEs = [] for userSE in self.userOutputSE: if userSE not in orderedSEs: prependSEs.append(userSE) orderedSEs = prependSEs + orderedSEs self.log.info('Ordered list of output SEs is: %s' % (', '.join(orderedSEs))) final = {} for fileName, metadata in fileMetadata.iteritems(): final[fileName] = metadata final[fileName]['resolvedSE'] = orderedSEs #At this point can exit and see exactly what the module will upload self.printOutputInfo(final) if not self.enable: return S_OK('Module is disabled by control flag') self.injectJobIndex( final ) #Instantiate the failover transfer client with the global request object failoverTransfer = FailoverTransfer(self._getRequestContainer()) #One by one upload the files with failover if necessary filesToReplicate = {} filesToFailover = {} filesUploaded = [] if not self.failoverTest: self.transferAndRegisterFiles(final, failoverTransfer, filesToFailover, filesUploaded, filesToReplicate) else: filesToFailover = final ##if there are files to be failovered, we do it now resultTRFF = self.transferRegisterAndFailoverFiles(failoverTransfer, filesToFailover, filesUploaded) cleanUp = resultTRFF['Value']['cleanUp'] #For files correctly uploaded must report LFNs to job parameters if filesUploaded: report = ', '.join( filesUploaded ) self.jobReport.setJobParameter( 'UploadedOutputData', report ) self.workflow_commons['Request'] = failoverTransfer.request #If some or all of the files failed to be saved to failover if cleanUp: #Leave any uploaded files just in case it is useful for the user #do not try to replicate any files. return S_ERROR('Failed To Upload Output Data') #If there is now at least one replica for uploaded files can trigger replication datMan = DataManager( catalogs = self.userFileCatalog ) self.log.info('Sleeping for 10 seconds before attempting replication of recently uploaded files') time.sleep(10) for lfn, repSE in filesToReplicate.items(): resultRAR = datMan.replicateAndRegister(lfn, repSE) if not resultRAR['OK']: self.log.info('Replication failed with below error but file already exists in Grid storage with \ at least one replica:\n%s' % (resultRAR)) self.generateFailoverFile() self.setApplicationStatus('Job Finished Successfully') return S_OK('Output data uploaded')
def execute(self): """ Main execution function. """ #Have to work out if the module is part of the last step i.e. #user jobs can have any number of steps and we only want #to run the finalization once. currentStep = int(self.step_commons['STEP_NUMBER']) totalSteps = int(self.workflow_commons['TotalSteps']) if currentStep == totalSteps: self.lastStep = True else: self.log.verbose( 'Current step = %s, total steps of workflow = %s, UserJobFinalization will enable itself only \ at the last workflow step.' % (currentStep, totalSteps)) if not self.lastStep: #Not last step, do nothing, proceed happily. return S_OK() result = self.resolveInputVariables() if not result['OK']: self.log.error("Failed to resolve input parameters:", result['Message']) return result self.log.info('Initializing %s' % self.version) if not self.workflowStatus['OK'] or not self.stepStatus['OK']: ##Something went wrong in the step or the workflow, do nothing. self.log.verbose( 'Workflow status = %s, step status = %s' % (self.workflowStatus['OK'], self.stepStatus['OK'])) return S_OK('No output data upload attempted') self.request.RequestName = 'job_%d_request.xml' % int(self.jobID) self.request.JobID = self.jobID self.request.SourceComponent = "Job_%d" % int(self.jobID) if not self.userOutputData: self.log.info( 'No user output data is specified for this job, nothing to do') return S_OK('No output data to upload') #Determine the final list of possible output files for the #workflow and all the parameters needed to upload them. outputList = [] possible_files = [] for i in self.userOutputData: files = getGlobbedFiles(i) for possible_file in files: if possible_file in possible_files: #Don't have twice the same file continue outputList.append({ 'outputDataType': i.split('.')[-1].upper( ), #this would be used to sort the files in different dirs 'outputDataSE': self.userOutputSE, 'outputFile': os.path.basename(possible_file) }) possible_files.append(os.path.basename(possible_file)) self.log.info('Constructing user output LFN(s) for %s' % (', '.join(self.userOutputData))) if not self.jobID: self.jobID = 12345 owner = '' if 'Owner' in self.workflow_commons: owner = self.workflow_commons['Owner'] else: res = getCurrentOwner() if not res['OK']: self.log.error('Could not find proxy') return S_ERROR('Could not obtain owner from proxy') owner = res['Value'] vo = '' if self.workflow_commons.has_key('VO'): vo = self.workflow_commons['VO'] else: res = getVOfromProxyGroup() if not res['OK']: self.log.error('Failed finding the VO') return S_ERROR('Could not obtain VO from proxy') vo = res['Value'] result = constructUserLFNs(int(self.jobID), vo, owner, possible_files, self.userOutputPath) if not result['OK']: self.log.error('Could not create user LFNs', result['Message']) return result userOutputLFNs = result['Value'] self.log.verbose('Calling getCandidateFiles( %s, %s)' % (outputList, userOutputLFNs)) result = self.getCandidateFiles(outputList, userOutputLFNs) if not result['OK']: if not self.ignoreapperrors: self.log.error(result['Message']) self.setApplicationStatus(result['Message']) return S_OK() fileDict = result['Value'] result = self.getFileMetadata(fileDict) if not result['OK']: if not self.ignoreapperrors: self.log.error(result['Message']) self.setApplicationStatus(result['Message']) return S_OK() if not result['Value']: if not self.ignoreapperrors: self.log.info( 'No output data files were determined to be uploaded for this workflow' ) self.setApplicationStatus('No Output Data Files To Upload') return S_OK() fileMetadata = result['Value'] orderedSEs = self.userOutputSE self.log.info('Ordered list of output SEs is: %s' % (', '.join(orderedSEs))) final = {} for fileName, metadata in fileMetadata.items(): final[fileName] = metadata final[fileName]['resolvedSE'] = orderedSEs #At this point can exit and see exactly what the module will upload if not self.enable: self.log.info( 'Module is disabled by control flag, would have attempted \ to upload the following files %s' % ', '.join(final.keys())) for fileName, metadata in final.items(): self.log.info('--------%s--------' % fileName) for n, v in metadata.items(): self.log.info('%s = %s' % (n, v)) return S_OK('Module is disabled by control flag') #Instantiate the failover transfer client with the global request object failoverTransfer = FailoverTransfer(self.request) #One by one upload the files with failover if necessary replication = {} failover = {} uploaded = [] if not self.failoverTest: for fileName, metadata in final.items(): self.log.info( "Attempting to store file %s to the following SE(s):\n%s" % (fileName, ', '.join(metadata['resolvedSE']))) replicateSE = '' result = failoverTransfer.transferAndRegisterFile( fileName, metadata['localpath'], metadata['lfn'], metadata['resolvedSE'], fileMetaDict=metadata, fileCatalog=self.userFileCatalog) if not result['OK']: self.log.error( 'Could not transfer and register %s with metadata:\n %s' % (fileName, metadata)) failover[fileName] = metadata else: #Only attempt replication after successful upload lfn = metadata['lfn'] uploaded.append(lfn) seList = metadata['resolvedSE'] if result['Value'].has_key('uploadedSE'): uploadedSE = result['Value']['uploadedSE'] for se in seList: if not se == uploadedSE: replicateSE = se break if replicateSE and lfn: self.log.info('Will attempt to replicate %s to %s' % (lfn, replicateSE)) replication[lfn] = replicateSE else: failover = final cleanUp = False for fileName, metadata in failover.items(): random.shuffle(self.failoverSEs) targetSE = metadata['resolvedSE'][0] metadata['resolvedSE'] = self.failoverSEs result = failoverTransfer.transferAndRegisterFileFailover( fileName, metadata['localpath'], metadata['lfn'], targetSE, self.failoverSEs, fileMetaDict=metadata, fileCatalog=self.userFileCatalog) if not result['OK']: self.log.error( 'Could not transfer and register %s with metadata:\n %s' % (fileName, metadata)) cleanUp = True continue #for users can continue even if one completely fails else: lfn = metadata['lfn'] uploaded.append(lfn) #For files correctly uploaded must report LFNs to job parameters if uploaded: report = ', '.join(uploaded) self.jobReport.setJobParameter('UploadedOutputData', report) self.request = failoverTransfer.request #If some or all of the files failed to be saved to failover if cleanUp: self.workflow_commons['Request'] = self.request #Leave any uploaded files just in case it is useful for the user #do not try to replicate any files. return S_ERROR('Failed To Upload Output Data') #If there is now at least one replica for uploaded files can trigger replication rm = ReplicaManager() self.log.info( 'Sleeping for 10 seconds before attempting replication of recently uploaded files' ) time.sleep(10) for lfn, repSE in replication.items(): result = rm.replicateAndRegister(lfn, repSE, catalog=self.userFileCatalog) if not result['OK']: self.log.info( 'Replication failed with below error but file already exists in Grid storage with \ at least one replica:\n%s' % (result)) self.workflow_commons['Request'] = self.request self.generateFailoverFile() self.setApplicationStatus('Job Finished Successfully') return S_OK('Output data uploaded')