class UserJobFinalization(ModuleBase): """ Finalization of user jobs """ ############################################################################# def __init__(self, bkClient=None, dm=None): """Module initialization. """ self.log = gLogger.getSubLogger("UserJobFinalization") super(UserJobFinalization, self).__init__(self.log, bkClientIn=bkClient, dm=dm) self.version = __RCSID__ self.enable = True self.defaultOutputSE = resolveSEGroup( gConfig.getValue('/Resources/StorageElementGroups/Tier1-USER', [])) self.failoverSEs = resolveSEGroup( gConfig.getValue('/Resources/StorageElementGroups/Tier1-Failover', [])) # List all parameters here self.request = None # Always allow any files specified by users self.outputDataFileMask = '' self.userOutputData = [] self.userOutputSE = '' self.userOutputPath = '' self.failoverTransfer = None self.replicateUserOutputData = False self.userPrependString = '' ############################################################################# def _resolveInputVariables(self): """ By convention the module parameters are resolved here. """ super(UserJobFinalization, self)._resolveInputVariables() # Use LHCb utility for local running via dirac-jobexec if 'UserOutputData' in self.workflow_commons: userOutputData = self.workflow_commons['UserOutputData'] if not isinstance(userOutputData, list): userOutputData = [i.strip() for i in userOutputData.split(';')] self.userOutputData = userOutputData if 'UserOutputSE' in self.workflow_commons: specifiedSE = self.workflow_commons['UserOutputSE'] if not isinstance(specifiedSE, list): self.userOutputSE = [i.strip() for i in specifiedSE.split(';')] else: self.log.verbose( 'No UserOutputSE specified, using default value: %s' % (', '.join(self.defaultOutputSE))) self.userOutputSE = [] if 'UserOutputPath' in self.workflow_commons: self.userOutputPath = self.workflow_commons['UserOutputPath'] if 'ReplicateUserOutputData' in self.workflow_commons and self.workflow_commons[ 'ReplicateUserOutputData']: self.replicateUserOutputData = True if 'UserOutputLFNPrepend' in self.workflow_commons: self.userPrependString = self.workflow_commons[ 'UserOutputLFNPrepend'] ############################################################################# def execute(self, production_id=None, prod_job_id=None, wms_job_id=None, workflowStatus=None, stepStatus=None, wf_commons=None, step_commons=None, step_number=None, step_id=None, orderedSEs=None): """ Main execution function. """ try: super(UserJobFinalization, self).execute(self.version, production_id, prod_job_id, wms_job_id, workflowStatus, stepStatus, wf_commons, step_commons, step_number, step_id) self._resolveInputVariables() # Earlier modules may have populated the report objects self.request.RequestName = 'job_%d_request.xml' % self.jobID self.request.JobID = self.jobID self.request.SourceComponent = "Job_%d" % self.jobID if not self._checkWFAndStepStatus(): return S_OK() if not self.userOutputData: self.log.info( "No user output data is specified for this job, nothing to do" ) return S_OK("No output data to upload") self.log.info("User specified output file list is: %s" % (', '.join(self.userOutputData))) globList = [] for i in self.userOutputData: if re.search('\*', i): globList.append(i) # Check whether list of userOutputData is a globbable pattern if globList: for i in globList: self.userOutputData.remove(i) globbedOutputList = list(set(getGlobbedFiles(globList))) if globbedOutputList: self.log.info( 'Found a pattern in the output data file list, \ extra files to upload are: %s' % (', '.join(globbedOutputList))) self.userOutputData += globbedOutputList else: self.log.info( "No files were found on the local disk for the following patterns: %s" % (', '.join(globList))) self.log.info("Final list of files to upload are: %s" % (', '.join(self.userOutputData))) # Determine the final list of possible output files for the workflow and all the parameters needed to upload them. outputList = [] for i in self.userOutputData: outputList.append({ 'outputDataType': ('.'.split(i)[-1]).upper(), 'outputDataName': os.path.basename(i) }) userOutputLFNs = [] if self.userOutputData: self.log.info("Constructing user output LFN(s) for %s" % (', '.join(self.userOutputData))) userOutputLFNs = constructUserLFNs(self.jobID, self._getCurrentOwner(), self.userOutputData, self.userOutputPath, self.userPrependString) self.log.verbose( "Calling getCandidateFiles( %s, %s, %s)" % (outputList, userOutputLFNs, self.outputDataFileMask)) try: fileDict = self.getCandidateFiles(outputList, userOutputLFNs, self.outputDataFileMask) except os.error as e: self.setApplicationStatus(e) return S_OK() try: fileMetadata = self.getFileMetadata(fileDict) except RuntimeError as e: self.setApplicationStatus(e) return S_OK() if not fileMetadata: self.log.info( "No output data files were determined to be uploaded for this workflow" ) self.setApplicationStatus('No Output Data Files To Upload') return S_OK() if not orderedSEs: orderedSEs = self._getOrderedSEsList() self.log.info("Ordered list of output SEs is: %s" % (', '.join(orderedSEs))) final = {} for fileName, metadata in fileMetadata.items(): final[fileName] = metadata final[fileName]['resolvedSE'] = orderedSEs # At this point can exit and see exactly what the module will upload if not self._enableModule(): self.log.info( "Module disabled would have attempted to upload the files %s" % ', '.join(final.keys())) for fileName, metadata in final.items(): self.log.info('--------%s--------' % fileName) for n, v in metadata.items(): self.log.info('%s = %s' % (n, v)) return S_OK("Module is disabled by control flag") # Disable the watchdog check in case the file uploading takes a long time self._disableWatchdogCPUCheck() # Instantiate the failover transfer client with the global request object if not self.failoverTransfer: self.failoverTransfer = FailoverTransfer(self.request) # One by one upload the files with failover if necessary replication = {} failover = {} uploaded = [] for fileName, metadata in final.items(): self.log.info( "Attempting to store %s to the following SE(s): %s" % (fileName, ', '.join(metadata['resolvedSE']))) fileMetaDict = { 'Size': metadata['filedict']['Size'], 'LFN': metadata['filedict']['LFN'], 'GUID': metadata['filedict']['GUID'], 'Checksum': metadata['filedict']['Checksum'], 'ChecksumType': metadata['filedict']['ChecksumType'] } result = self.failoverTransfer.transferAndRegisterFile( fileName=fileName, localPath=metadata['localpath'], lfn=metadata['filedict']['LFN'], destinationSEList=metadata['resolvedSE'], fileMetaDict=fileMetaDict, masterCatalogOnly=True) if not result['OK']: self.log.error( "Could not transfer and register %s with metadata:\n %s" % (fileName, metadata)) failover[fileName] = metadata else: # Only attempt replication after successful upload lfn = metadata['lfn'] uploaded.append(lfn) seList = metadata['resolvedSE'] replicateSE = '' uploadedSE = result['Value'].get('uploadedSE', '') if uploadedSE: for se in seList: if not se == uploadedSE: replicateSE = se break if replicateSE and lfn and self.replicateUserOutputData: self.log.info("Will attempt to replicate %s to %s" % (lfn, replicateSE)) replication[lfn] = (uploadedSE, replicateSE, fileMetaDict) cleanUp = False for fileName, metadata in failover.items(): random.shuffle(self.failoverSEs) targetSE = metadata['resolvedSE'][0] if len(metadata['resolvedSE']) > 1: replicateSE = metadata['resolvedSE'][1] else: replicateSE = '' metadata['resolvedSE'] = self.failoverSEs fileMetaDict = { 'Size': metadata['filedict']['Size'], 'LFN': metadata['filedict']['LFN'], 'GUID': metadata['filedict']['GUID'] } result = self.failoverTransfer.transferAndRegisterFileFailover( fileName, metadata['localpath'], metadata['lfn'], targetSE, metadata['resolvedSE'], fileMetaDict=fileMetaDict, masterCatalogOnly=True) if not result['OK']: self.log.error( "Could not transfer and register %s with metadata:\n %s" % (fileName, metadata)) cleanUp = True continue # for users can continue even if one completely fails else: lfn = metadata['lfn'] uploaded.append(lfn) # Even when using Failover, one needs to replicate to a second SE if replicateSE and self.replicateUserOutputData: replication[lfn] = (targetSE, replicateSE, fileMetaDict) # For files correctly uploaded must report LFNs to job parameters if uploaded: report = ', '.join(uploaded) self.setJobParameter('UploadedOutputData', report) # Now after all operations, retrieve potentially modified request object self.request = self.failoverTransfer.request # If some or all of the files failed to be saved to failover if cleanUp: self.workflow_commons['Request'] = self.request # Leave any uploaded files just in case it is useful for the user # do not try to replicate any files. return S_ERROR("Failed To Upload Output Data") for lfn, (uploadedSE, repSE, fileMetaDictItem) in replication.items(): self.failoverTransfer._setFileReplicationRequest( lfn, repSE, fileMetaDictItem, uploadedSE) self.workflow_commons['Request'] = self.failoverTransfer.request self.generateFailoverFile() self.setApplicationStatus("Job Finished Successfully") return S_OK('Output data uploaded') except Exception as e: # pylint:disable=broad-except self.log.exception("Failure in UserJobFinalization execute module", lException=e) self.setApplicationStatus(repr(e)) return S_ERROR(str(e)) finally: super(UserJobFinalization, self).finalize(self.version) ############################################################################# def _getOrderedSEsList(self): """ Returns list of ordered SEs to which trying to upload """ # FIXME: remove all banned SEs (not the force ones) # First get the local (or assigned) SE to try first for upload and others in random fashion localSEs = set( getDestinationSEList('Tier1-USER', self.siteName, outputmode='local')) self.log.verbose("Site Local SE for user outputs is: %s" % (list(localSEs))) userSEs = set(self.userOutputSE) otherSEs = set(self.defaultOutputSE) - localSEs - userSEs # If a user SE is local set it first topSEs = userSEs & localSEs # reordered user SEs, setting local first userSEs = list(topSEs) + list(userSEs - topSEs) localSEs = list(localSEs - topSEs) if len(userSEs) < 2 and localSEs: # Set a local SE first orderedSEs = localSEs[0:1] + userSEs + localSEs[1:] else: orderedSEs = userSEs + localSEs random.shuffle(list(otherSEs)) orderedSEs += otherSEs return orderedSEs def _getCurrentOwner(self): """Simple function to return current DIRAC username. """ if 'OwnerName' in self.workflow_commons: return self.workflow_commons['OwnerName'] result = getProxyInfo() if not result['OK']: if not self._enableModule(): return 'testUser' raise RuntimeError('Could not obtain proxy information') if 'username' not in result['Value']: raise RuntimeError('Could not get username from proxy') return result['Value']['username']
def execute(self): #Have to work out if the module is part of the last step i.e. #user jobs can have any number of steps and we only want #to run the finalization once. currentStep = int(self.step_commons['STEP_NUMBER']) totalSteps = int(self.workflow_commons['TotalSteps']) if currentStep==totalSteps: self.lastStep=True else: self.log.verbose('Current step = %s, total steps of workflow = %s, HandleProdOutputData will enable itself only at the last workflow step.' %(currentStep,totalSteps)) if not self.lastStep: return S_OK() self.result =self.resolveInputVariables() if not self.result['OK']: self.log.error(self.result['Message']) return self.result ###Instantiate object that will ensure that the files are registered properly failoverTransfer = FailoverTransfer(self.request) datatohandle = {} if self.generatorfile: if not os.path.exists(self.generatorfile): return S_ERROR("File %s does not exist, something went wrong before !"%(self.generatorfile)) self.attributesdict['DataType'] = 'gen' lfnpath = string.join([self.basepath,self.attributesdict['Machine'],self.attributesdict['Energy'], self.attributesdict['DataType'],self.attributesdict['EvtType'],self.attributesdict['ProdID'], self.generatorfile],"/") datatohandle[self.generatorfile]={'lfn':lfnpath,'type':'gen','workflowSE':self.destination} if self.mokkafile or self.slicfile: recofile = '' if self.mokkafile and not os.path.exists(self.mokkafile): return S_ERROR("File %s does not exist, something went wrong before !"%(self.mokkafile)) else: recofile = self.mokkafile if self.slicfile and not os.path.exists(self.slicfile): return S_ERROR("File %s does not exist, something went wrong before !"%(self.slicfile)) else: recofile = self.slicfile self.attributesdict['DataType'] = 'SIM' lfnpath = string.join([self.basepath,self.attributesdict['Machine'],self.attributesdict['Energy'], self.attributesdict['DetectorModel'],self.attributesdict['DataType'],self.attributesdict['EvtType'], self.attributesdict['ProdID'],recofile],"/") datatohandle[recofile]={'lfn':lfnpath,'type':'gen','workflowSE':self.destination} ##Below, look in file name if it contain REC or DST, to determine the data type. if self.marlinfiles: for file in self.marlinfiles: if file.find("REC")>-1: self.attributesdict['DataType'] = 'REC' if file.find("DST")>-1: self.attributesdict['DataType'] = 'DST' lfnpath = string.join([self.basepath,self.attributesdict['Machine'],self.attributesdict['Energy'], self.attributesdict['DetectorModel'],self.attributesdict['DataType'],self.attributesdict['EvtType'], self.attributesdict['ProdID'],file],"/") datatohandle[file]={'lfn':lfnpath,'type':'gen','workflowSE':self.destination} if self.lcsimfiles: for file in self.lcsimfiles: if file.find("DST")>-1: self.attributesdict['DataType'] = 'DST' lfnpath = string.join([self.basepath,self.attributesdict['Machine'],self.attributesdict['Energy'], self.attributesdict['DetectorModel'],self.attributesdict['DataType'],self.attributesdict['EvtType'], self.attributesdict['ProdID'],file],"/") datatohandle[file]={'lfn':lfnpath,'type':'gen','workflowSE':self.destination} result = self.getFileMetadata(datatohandle) if not result['OK']: self.setApplicationStatus(result['Message']) return S_OK() fileMetadata = result['Value'] final = {} for fileName,metadata in fileMetadata.items(): final[fileName]=metadata final[fileName]['resolvedSE']=self.destination #One by one upload the files with failover if necessary replication = {} failover = {} uploaded = [] if not self.failoverTest: for fileName,metadata in final.items(): self.log.info("Attempting to store file %s to the following SE(s):\n%s" % (fileName, string.join(metadata['resolvedSE'],', '))) result = failoverTransfer.transferAndRegisterFile(fileName,metadata['localpath'],metadata['lfn'],metadata['resolvedSE'],fileGUID=metadata['guid'],fileCatalog=self.userFileCatalog) if not result['OK']: self.log.error('Could not transfer and register %s with metadata:\n %s' %(fileName,metadata)) failover[fileName]=metadata else: #Only attempt replication after successful upload lfn = metadata['lfn'] uploaded.append(lfn) seList = metadata['resolvedSE'] replicateSE = '' if result['Value'].has_key('uploadedSE'): uploadedSE = result['Value']['uploadedSE'] for se in seList: if not se == uploadedSE: replicateSE = se break if replicateSE and lfn: self.log.info('Will attempt to replicate %s to %s' %(lfn,replicateSE)) replication[lfn]=replicateSE else: failover = final cleanUp = False for fileName,metadata in failover.items(): random.shuffle(self.failoverSEs) targetSE = metadata['resolvedSE'][0] metadata['resolvedSE']=self.failoverSEs result = failoverTransfer.transferAndRegisterFileFailover(fileName,metadata['localpath'],metadata['lfn'],targetSE,metadata['resolvedSE'],fileGUID=metadata['guid'],fileCatalog=self.userFileCatalog) if not result['OK']: self.log.error('Could not transfer and register %s with metadata:\n %s' %(fileName,metadata)) cleanUp = True continue #for users can continue even if one completely fails else: lfn = metadata['lfn'] uploaded.append(lfn) #For files correctly uploaded must report LFNs to job parameters if uploaded: report = string.join( uploaded, ', ' ) self.jobReport.setJobParameter( 'UploadedOutputData', report ) #Now after all operations, retrieve potentially modified request object result = failoverTransfer.getRequestObject() if not result['OK']: self.log.error(result) return S_ERROR('Could Not Retrieve Modified Request') self.request = result['Value'] #If some or all of the files failed to be saved to failover if cleanUp: self.workflow_commons['Request']=self.request #Leave any uploaded files just in case it is useful for the user #do not try to replicate any files. return S_ERROR('Failed To Upload Output Data') return S_OK()
def execute(self): """ Main execution function. """ #Have to work out if the module is part of the last step i.e. #user jobs can have any number of steps and we only want #to run the finalization once. currentStep = int(self.step_commons['STEP_NUMBER']) totalSteps = int(self.workflow_commons['TotalSteps']) if currentStep == totalSteps: self.lastStep = True else: self.log.verbose('Current step = %s, total steps of workflow = %s, UserJobFinalization will enable itself only \ at the last workflow step.' % (currentStep, totalSteps)) if not self.lastStep: #Not last step, do nothing, proceed happily. return S_OK() result = self.resolveInputVariables() if not result['OK']: self.log.error("Failed to resolve input parameters:", result['Message']) return result self.log.info('Initializing %s' % self.version) if not self.workflowStatus['OK'] or not self.stepStatus['OK']: ##Something went wrong in the step or the workflow, do nothing. self.log.verbose('Workflow status = %s, step status = %s' % (self.workflowStatus['OK'], self.stepStatus['OK'])) return S_OK('No output data upload attempted') self.request.RequestName = 'job_%d_request.xml' % int(self.jobID) self.request.JobID = self.jobID self.request.SourceComponent = "Job_%d" % int(self.jobID) if not self.userOutputData: self.log.info('No user output data is specified for this job, nothing to do') return S_OK('No output data to upload') #Determine the final list of possible output files for the #workflow and all the parameters needed to upload them. outputList = [] possible_files= [] for i in self.userOutputData: files = getGlobbedFiles(i) for possible_file in files: if possible_file in possible_files: #Don't have twice the same file continue outputList.append({'outputDataType' : i.split('.')[-1].upper(),#this would be used to sort the files in different dirs 'outputDataSE' : self.userOutputSE, 'outputFile' : os.path.basename(possible_file)}) possible_files.append(os.path.basename(possible_file)) self.log.info('Constructing user output LFN(s) for %s' % (', '.join(self.userOutputData))) if not self.jobID: self.jobID = 12345 owner = '' if 'Owner' in self.workflow_commons: owner = self.workflow_commons['Owner'] else: res = getCurrentOwner() if not res['OK']: self.log.error('Could not find proxy') return S_ERROR('Could not obtain owner from proxy') owner = res['Value'] vo = '' if self.workflow_commons.has_key('VO'): vo = self.workflow_commons['VO'] else: res = getVOfromProxyGroup() if not res['OK']: self.log.error('Failed finding the VO') return S_ERROR('Could not obtain VO from proxy') vo = res['Value'] result = constructUserLFNs(int(self.jobID), vo, owner, possible_files, self.userOutputPath) if not result['OK']: self.log.error('Could not create user LFNs', result['Message']) return result userOutputLFNs = result['Value'] self.log.verbose('Calling getCandidateFiles( %s, %s)' % (outputList, userOutputLFNs)) result = self.getCandidateFiles(outputList, userOutputLFNs) if not result['OK']: if not self.ignoreapperrors: self.log.error(result['Message']) self.setApplicationStatus(result['Message']) return S_OK() fileDict = result['Value'] result = self.getFileMetadata(fileDict) if not result['OK']: if not self.ignoreapperrors: self.log.error(result['Message']) self.setApplicationStatus(result['Message']) return S_OK() if not result['Value']: if not self.ignoreapperrors: self.log.info('No output data files were determined to be uploaded for this workflow') self.setApplicationStatus('No Output Data Files To Upload') return S_OK() fileMetadata = result['Value'] orderedSEs = self.userOutputSE self.log.info('Ordered list of output SEs is: %s' % (', '.join(orderedSEs))) final = {} for fileName, metadata in fileMetadata.items(): final[fileName] = metadata final[fileName]['resolvedSE'] = orderedSEs #At this point can exit and see exactly what the module will upload if not self.enable: self.log.info('Module is disabled by control flag, would have attempted \ to upload the following files %s' % ', '.join(final.keys())) for fileName, metadata in final.items(): self.log.info('--------%s--------' % fileName) for n, v in metadata.items(): self.log.info('%s = %s' %(n, v)) return S_OK('Module is disabled by control flag') #Instantiate the failover transfer client with the global request object failoverTransfer = FailoverTransfer(self.request) #One by one upload the files with failover if necessary replication = {} failover = {} uploaded = [] if not self.failoverTest: for fileName, metadata in final.items(): self.log.info("Attempting to store file %s to the following SE(s):\n%s" % (fileName, ', '.join(metadata['resolvedSE']))) replicateSE = '' result = failoverTransfer.transferAndRegisterFile(fileName, metadata['localpath'], metadata['lfn'], metadata['resolvedSE'], fileMetaDict = metadata, fileCatalog = self.userFileCatalog) if not result['OK']: self.log.error('Could not transfer and register %s with metadata:\n %s' % (fileName, metadata)) failover[fileName] = metadata else: #Only attempt replication after successful upload lfn = metadata['lfn'] uploaded.append(lfn) seList = metadata['resolvedSE'] if result['Value'].has_key('uploadedSE'): uploadedSE = result['Value']['uploadedSE'] for se in seList: if not se == uploadedSE: replicateSE = se break if replicateSE and lfn: self.log.info('Will attempt to replicate %s to %s' % (lfn, replicateSE)) replication[lfn] = replicateSE else: failover = final cleanUp = False for fileName, metadata in failover.items(): random.shuffle(self.failoverSEs) targetSE = metadata['resolvedSE'][0] metadata['resolvedSE'] = self.failoverSEs result = failoverTransfer.transferAndRegisterFileFailover(fileName, metadata['localpath'], metadata['lfn'], targetSE, self.failoverSEs, fileMetaDict = metadata, fileCatalog = self.userFileCatalog) if not result['OK']: self.log.error('Could not transfer and register %s with metadata:\n %s' % (fileName, metadata)) cleanUp = True continue #for users can continue even if one completely fails else: lfn = metadata['lfn'] uploaded.append(lfn) #For files correctly uploaded must report LFNs to job parameters if uploaded: report = ', '.join( uploaded ) self.jobReport.setJobParameter( 'UploadedOutputData', report ) self.request = failoverTransfer.request #If some or all of the files failed to be saved to failover if cleanUp: self.workflow_commons['Request'] = self.request #Leave any uploaded files just in case it is useful for the user #do not try to replicate any files. return S_ERROR('Failed To Upload Output Data') #If there is now at least one replica for uploaded files can trigger replication rm = ReplicaManager() self.log.info('Sleeping for 10 seconds before attempting replication of recently uploaded files') time.sleep(10) for lfn, repSE in replication.items(): result = rm.replicateAndRegister(lfn, repSE, catalog = self.userFileCatalog) if not result['OK']: self.log.info('Replication failed with below error but file already exists in Grid storage with \ at least one replica:\n%s' % (result)) self.workflow_commons['Request'] = self.request self.generateFailoverFile() self.setApplicationStatus('Job Finished Successfully') return S_OK('Output data uploaded')
def execute(self): """ Main execution function. """ #Have to work out if the module is part of the last step i.e. #user jobs can have any number of steps and we only want #to run the finalization once. currentStep = int(self.step_commons['STEP_NUMBER']) totalSteps = int(self.workflow_commons['TotalSteps']) if currentStep == totalSteps: self.lastStep = True else: self.log.verbose('Current step = %s, total steps of workflow = %s, UserJobFinalization will enable itself only \ at the last workflow step.' % (currentStep, totalSteps)) if not self.lastStep: return S_OK() result = self.resolveInputVariables() if not result['OK']: self.log.error(result['Message']) return result self.log.info('Initializing %s' % self.version) if not self.workflowStatus['OK'] or not self.stepStatus['OK']: self.log.verbose('Workflow status = %s, step status = %s' % (self.workflowStatus['OK'], self.stepStatus['OK'])) return S_OK('No output data upload attempted') if not self.userOutputData: self.log.info('No user output data is specified for this job, nothing to do') return S_OK('No output data to upload') #Determine the final list of possible output files for the #workflow and all the parameters needed to upload them. outputList = [] for i in self.userOutputData: outputList.append({'outputPath' : string.upper(string.split(i, '.')[-1]), 'outputDataSE' : self.userOutputSE, 'outputFile' : os.path.basename(i)}) userOutputLFNs = [] if self.userOutputData: self.log.info('Constructing user output LFN(s) for %s' % (string.join(self.userOutputData, ', '))) if not self.jobID: self.jobID = 12345 owner = '' if self.workflow_commons.has_key('Owner'): owner = self.workflow_commons['Owner'] else: res = self.getCurrentOwner() if not res['OK']: return S_ERROR('Could not obtain owner from proxy') owner = res['Value'] vo = '' if self.workflow_commons.has_key('VO'): vo = self.workflow_commons['VO'] else: res = self.getCurrentVO() if not res['OK']: return S_ERROR('Could not obtain VO from proxy') vo = res['Value'] result = constructUserLFNs(int(self.jobID), vo, owner, self.userOutputData, self.userOutputPath) if not result['OK']: self.log.error('Could not create user LFNs', result['Message']) return result userOutputLFNs = result['Value'] self.log.verbose('Calling getCandidateFiles( %s, %s, %s)' % (outputList, userOutputLFNs, self.outputDataFileMask)) result = self.getCandidateFiles(outputList, userOutputLFNs, self.outputDataFileMask) if not result['OK']: if not self.ignoreapperrors: self.setApplicationStatus(result['Message']) return S_OK() fileDict = result['Value'] result = self.getFileMetadata(fileDict) if not result['OK']: if not self.ignoreapperrors: self.setApplicationStatus(result['Message']) return S_OK() if not result['Value']: if not self.ignoreapperrors: self.log.info('No output data files were determined to be uploaded for this workflow') self.setApplicationStatus('No Output Data Files To Upload') return S_OK() fileMetadata = result['Value'] #First get the local (or assigned) SE to try first for upload and others in random fashion result = getDestinationSEList('Tier1-USER', DIRAC.siteName(), outputmode='local') if not result['OK']: self.log.error('Could not resolve output data SE', result['Message']) self.setApplicationStatus('Failed To Resolve OutputSE') return result localSE = result['Value'] self.log.verbose('Site Local SE for user outputs is: %s' % (localSE)) orderedSEs = self.defaultOutputSE for se in localSE: if se in orderedSEs: orderedSEs.remove(se) for se in self.userOutputSE: if se in orderedSEs: orderedSEs.remove(se) orderedSEs = localSE + List.randomize(orderedSEs) if self.userOutputSE: prependSEs = [] for userSE in self.userOutputSE: if not userSE in orderedSEs: prependSEs.append(userSE) orderedSEs = prependSEs + orderedSEs self.log.info('Ordered list of output SEs is: %s' % (string.join(orderedSEs, ', '))) final = {} for fileName, metadata in fileMetadata.items(): final[fileName] = metadata final[fileName]['resolvedSE'] = orderedSEs #At this point can exit and see exactly what the module will upload if not self.enable: self.log.info('Module is disabled by control flag, would have attempted \ to upload the following files %s' % string.join(final.keys(), ', ')) for fileName, metadata in final.items(): self.log.info('--------%s--------' % fileName) for n, v in metadata.items(): self.log.info('%s = %s' %(n, v)) return S_OK('Module is disabled by control flag') #Instantiate the failover transfer client with the global request object failoverTransfer = FailoverTransfer(self.request) #One by one upload the files with failover if necessary replication = {} failover = {} uploaded = [] if not self.failoverTest: for fileName, metadata in final.items(): self.log.info("Attempting to store file %s to the following SE(s):\n%s" % (fileName, string.join(metadata['resolvedSE'], ', '))) result = failoverTransfer.transferAndRegisterFile(fileName, metadata['localpath'], metadata['lfn'], metadata['resolvedSE'], fileGUID = metadata['guid'], fileCatalog = self.userFileCatalog) if not result['OK']: self.log.error('Could not transfer and register %s with metadata:\n %s' % (fileName, metadata)) failover[fileName] = metadata else: #Only attempt replication after successful upload lfn = metadata['lfn'] uploaded.append(lfn) seList = metadata['resolvedSE'] replicateSE = '' if result['Value'].has_key('uploadedSE'): uploadedSE = result['Value']['uploadedSE'] for se in seList: if not se == uploadedSE: replicateSE = se break if replicateSE and lfn: self.log.info('Will attempt to replicate %s to %s' % (lfn, replicateSE)) replication[lfn] = replicateSE else: failover = final cleanUp = False for fileName, metadata in failover.items(): random.shuffle(self.failoverSEs) targetSE = metadata['resolvedSE'][0] metadata['resolvedSE'] = self.failoverSEs result = failoverTransfer.transferAndRegisterFileFailover(fileName, metadata['localpath'], metadata['lfn'], targetSE, metadata['resolvedSE'], fileGUID = metadata['guid'], fileCatalog = self.userFileCatalog) if not result['OK']: self.log.error('Could not transfer and register %s with metadata:\n %s' % (fileName, metadata)) cleanUp = True continue #for users can continue even if one completely fails else: lfn = metadata['lfn'] uploaded.append(lfn) #For files correctly uploaded must report LFNs to job parameters if uploaded: report = string.join( uploaded, ', ' ) self.jobReport.setJobParameter( 'UploadedOutputData', report ) #Now after all operations, retrieve potentially modified request object result = failoverTransfer.getRequestObject() if not result['OK']: self.log.error(result) return S_ERROR('Could Not Retrieve Modified Request') self.request = result['Value'] #If some or all of the files failed to be saved to failover if cleanUp: self.workflow_commons['Request'] = self.request #Leave any uploaded files just in case it is useful for the user #do not try to replicate any files. return S_ERROR('Failed To Upload Output Data') #If there is now at least one replica for uploaded files can trigger replication rm = ReplicaManager() self.log.info('Sleeping for 10 seconds before attempting replication of recently uploaded files') time.sleep(10) for lfn, repSE in replication.items(): result = rm.replicateAndRegister(lfn, repSE, catalog = self.userFileCatalog) if not result['OK']: self.log.info('Replication failed with below error but file already exists in Grid storage with \ at least one replica:\n%s' % (result)) self.workflow_commons['Request'] = self.request #Now must ensure if any pending requests are generated that these are propagated to the job wrapper reportRequest = None if self.jobReport: result = self.jobReport.generateRequest() if not result['OK']: self.log.warn('Could not generate request for job report with result:\n%s' % (result)) else: reportRequest = result['Value'] if reportRequest: self.log.info('Populating request with job report information') self.request.update(reportRequest) if not self.request.isEmpty()['Value']: request_string = self.request.toXML()['Value'] # Write out the request string fname = 'user_job_%s_request.xml' % (self.jobID) xmlfile = open(fname, 'w') xmlfile.write(request_string) xmlfile.close() self.log.info('Creating failover request for deferred operations for job %s:' % self.jobID) result = self.request.getDigest() if result['OK']: digest = result['Value'] self.log.info(digest) self.setApplicationStatus('Job Finished Successfully') return S_OK('Output data uploaded')
class UploadOutputData(ModuleBase): """ Module to upload specified job output files according to the parameters defined in the production workflow. """ ############################################################################# def __init__(self, bkClient=None, dm=None): """ Module initialization. """ self.log = gLogger.getSubLogger("UploadOutputData") super(UploadOutputData, self).__init__(self.log, bkClientIn=bkClient, dm=dm) self.version = __RCSID__ self.commandTimeOut = 10 * 60 self.jobID = '' self.existingCatalogs = [] result = gConfig.getSections('/Resources/FileCatalogs') if result['OK']: self.existingCatalogs = result['Value'] # List all parameters here self.outputDataFileMask = '' self.outputMode = 'Any' # or 'Run', for Reco/Stripping case, or 'Local', e.g. for MCMerge self.outputList = [] self.outputDataStep = '' self.request = None self.failoverTransfer = None self.prodOutputLFNs = [] self.failoverSEs = None ############################################################################# def _resolveInputVariables(self): """ By convention the module parameters are resolved here. """ super(UploadOutputData, self)._resolveInputVariables() if 'outputDataStep' in self.workflow_commons: self.outputDataStep = [ str(ds) for ds in self.workflow_commons['outputDataStep'].split(';') ] if 'outputList' in self.workflow_commons: self.outputList = self.workflow_commons['outputList'] if 'outputMode' in self.workflow_commons: self.outputMode = self.workflow_commons['outputMode'] # Use LHCb utility for local running via jobexec if 'ProductionOutputData' in self.workflow_commons: self.prodOutputLFNs = self.workflow_commons['ProductionOutputData'] if isinstance(self.prodOutputLFNs, basestring): self.prodOutputLFNs = [ i.strip() for i in self.prodOutputLFNs.split(';') ] # pylint: disable=no-member else: self.log.info( "ProductionOutputData parameter not found, creating on the fly" ) result = constructProductionLFNs(self.workflow_commons, self.bkClient) if not result['OK']: self.log.error("Could not create production LFNs", result['Message']) return result self.prodOutputLFNs = result['Value']['ProductionOutputData'] ############################################################################# def execute(self, production_id=None, prod_job_id=None, wms_job_id=None, workflowStatus=None, stepStatus=None, wf_commons=None, step_commons=None, step_number=None, step_id=None, SEs=None, fileDescendants=None): """ Main execution function. 1. Determine the final list of possible output files for the workflow and all the parameters needed to upload them. 2. Verifying that the input files have no descendants (and exiting with error, otherwise) 3. Sending the BK records for the steps of the job 4. Transfer output files in their destination, register in the FC (with failover) 5. Registering the output files in the Bookkeeping """ try: super(UploadOutputData, self).execute(self.version, production_id, prod_job_id, wms_job_id, workflowStatus, stepStatus, wf_commons, step_commons, step_number, step_id) # This returns all Tier1-Failover unless a specific one is defined for the site self.failoverSEs = getDestinationSEList('Tier1-Failover', self.siteName, outputmode='Any') random.shuffle(self.failoverSEs) self._resolveInputVariables() if not self._checkWFAndStepStatus(): return S_OK( "Failures detected in previous steps: no output data upload attempted" ) # ## 1. Determine the final list of possible output files # ## for the workflow and all the parameters needed to upload them. # ## self.log.verbose("Getting the list of candidate files") fileDict = self.getCandidateFiles(self.outputList, self.prodOutputLFNs, self.outputDataFileMask, self.outputDataStep) fileMetadata = self.getFileMetadata(fileDict) if not fileMetadata: self.log.info( "No output data files were determined to be uploaded for this workflow" ) return S_OK() # Get final, resolved SE list for files final = {} for fileName, metadata in fileMetadata.iteritems(): if not SEs: resolvedSE = getDestinationSEList( metadata['workflowSE'], self.siteName, self.outputMode, self.workflow_commons.get('runNumber')) else: resolvedSE = SEs final[fileName] = metadata final[fileName]['resolvedSE'] = resolvedSE self.log.info("The following files will be uploaded", ": %s" % (', '.join(final.keys()))) for fileName, metadata in final.items(): self.log.info('--------%s--------' % fileName) for name, val in metadata.iteritems(): self.log.info('%s = %s' % (name, val)) if not self._enableModule(): # At this point can exit and see exactly what the module would have uploaded self.log.info( "Module disabled", "would have attempted to upload the files %s" % ', '.join(final.keys())) # ## 2. Prior to uploading any files must check (for productions with input data) that no descendant files # ## already exist with replica flag in the BK. # ## if self.inputDataList: if fileDescendants is not None: lfnsWithDescendants = fileDescendants else: if not self._enableModule(): self.log.info( "Module disabled", "would have attempted to check the files %s" % ', '.join(self.inputDataList)) lfnsWithDescendants = [] else: lfnsWithDescendants = getFileDescendants( self.production_id, self.inputDataList, dm=self.dataManager, bkClient=self.bkClient) if not lfnsWithDescendants: self.log.info( "No descendants found, outputs can be uploaded") else: self.log.error( "Found descendants!!! Outputs won't be uploaded") self.log.info("Files with descendants", ": %s" ' % '.join(lfnsWithDescendants)) self.log.info( "The files above will be set as 'Processed', other lfns in input will be later reset as Unused" ) self.fileReport.setFileStatus(int(self.production_id), lfnsWithDescendants, 'Processed') return S_ERROR("Input Data Already Processed") # ## 3. Sending the BK records for the steps of the job # ## bkFileExtensions = ['bookkeeping*.xml'] bkFiles = [] for ext in bkFileExtensions: self.log.debug("Looking at BK record wildcard: %s" % ext) globList = glob.glob(ext) for check in globList: if os.path.isfile(check): self.log.verbose( "Found locally existing BK file record", ": %s" % check) bkFiles.append(check) # Unfortunately we depend on the file names to order the BK records bkFilesListTuples = [] for bk in bkFiles: bkFilesListTuples.append( (bk, int(bk.split('_')[-1].split('.')[0]))) bkFiles = [ bk[0] for bk in sorted(bkFilesListTuples, key=itemgetter(1)) ] self.log.info("The following BK records will be sent", ": %s" % (', '.join(bkFiles))) if self._enableModule(): for bkFile in bkFiles: with open(bkFile, 'r') as fd: bkXML = fd.read() self.log.info("Sending BK record", ":\n%s" % (bkXML)) result = self.bkClient.sendXMLBookkeepingReport(bkXML) self.log.verbose(result) if result['OK']: self.log.info("Bookkeeping report sent", "for %s" % bkFile) else: self.log.error( "Could not send Bookkeeping XML file to server", ": %s" % result['Message']) self.log.info("Preparing DISET request", "for %s" % bkFile) bkDISETReq = Operation() bkDISETReq.Type = 'ForwardDISET' bkDISETReq.Arguments = DEncode.encode( result['rpcStub']) self.request.addOperation(bkDISETReq) self.workflow_commons[ 'Request'] = self.request # update each time, just in case else: self.log.info( "Would have attempted to send bk records, but module is disabled" ) # ## 4. Transfer output files in their destination, register in the FC (with failover) # ## # Disable the watchdog check in case the file uploading takes a long time self._disableWatchdogCPUCheck() # Instantiate the failover transfer client with the global request object if not self.failoverTransfer: self.failoverTransfer = FailoverTransfer(self.request) # Track which files are successfully uploaded (not to failover) via performBKRegistration = [] # Failover replicas are always added to the BK when they become available (actually, added to all the catalogs) failover = {} for fileName, metadata in final.items(): targetSE = metadata['resolvedSE'] self.log.info( "Attempting to store file to SE", "%s to the following SE(s):\n%s" % (fileName, ', '.join(targetSE))) fileMetaDict = { 'Size': metadata['filedict']['Size'], 'LFN': metadata['filedict']['LFN'], 'GUID': metadata['filedict']['GUID'], 'Checksum': metadata['filedict']['Checksum'], 'ChecksumType': metadata['filedict']['ChecksumType'] } if not self._enableModule(): # At this point can exit and see exactly what the module would have uploaded self.log.info( "Module disabled", "would have attempted to upload file %s" % fileName) continue result = self.failoverTransfer.transferAndRegisterFile( fileName=fileName, localPath=metadata['localpath'], lfn=metadata['filedict']['LFN'], destinationSEList=targetSE, fileMetaDict=fileMetaDict, masterCatalogOnly=True) if not result['OK']: self.log.error( "Could not transfer and register", " %s with metadata:\n %s" % (fileName, metadata)) failover[fileName] = metadata else: self.log.info( "File uploaded, will be registered in BK if all files uploaded for job", "(%s)" % fileName) # if the files are uploaded in the SE, independently if the registration in the FC is done, # then we have to register all of them in the BKK performBKRegistration.append(metadata) cleanUp = False for fileName, metadata in failover.items(): self.log.info( "Setting default catalog for failover transfer registration to master catalog" ) random.shuffle(self.failoverSEs) targetSE = metadata['resolvedSE'][0] metadata['resolvedSE'] = self.failoverSEs fileMetaDict = { 'Size': metadata['filedict']['Size'], 'LFN': metadata['filedict']['LFN'], 'GUID': metadata['filedict']['GUID'], 'Checksum': metadata['filedict']['Checksum'], 'ChecksumType': metadata['filedict']['ChecksumType'] } if not self._enableModule(): # At this point can exit and see exactly what the module would have uploaded self.log.info( "Module disabled", "would have attempted to upload with failover file %s" % fileName) continue result = self.failoverTransfer.transferAndRegisterFileFailover( fileName=fileName, localPath=metadata['localpath'], lfn=metadata['filedict']['LFN'], targetSE=targetSE, failoverSEList=metadata['resolvedSE'], fileMetaDict=fileMetaDict, masterCatalogOnly=True) if not result['OK']: self.log.error( "Could not transfer and register", "%s in failover with metadata:\n %s" % (fileName, metadata)) cleanUp = True break # no point continuing if one completely fails # Now after all operations, retrieve potentially modified request object self.request = self.failoverTransfer.request # If some or all of the files failed to be saved even to failover if cleanUp and self._enableModule(): self._cleanUp(final) self.workflow_commons['Request'] = self.request return S_ERROR('Failed to upload output data') # For files correctly uploaded must report LFNs to job parameters if final and self._enableModule(): report = ', '.join(final.keys()) self.setJobParameter('UploadedOutputData', report) # ## 5. Can now register the successfully uploaded files in the BK i.e. set the BK replica flags # ## if not performBKRegistration: self.log.info( "There are no files to perform the BK registration for, all are in failover" ) elif self._enableModule(): # performing BK registration # Getting what should be registered immediately, and what later lfnsToRegisterInBK = set([ metadata['filedict']['LFN'] for metadata in performBKRegistration ]) lfnsToRegisterInBKNow = self._getLFNsForBKRegistration( lfnsToRegisterInBK) lfnsToRegisterInBKLater = list(lfnsToRegisterInBK - set(lfnsToRegisterInBKNow)) # Registering what should be registering immediately, and handling failures result = FileCatalog( catalogs=['BookkeepingDB']).addFile(lfnsToRegisterInBKNow) self.log.verbose("BookkeepingDB.addFile: %s" % result) if not result['OK']: self.log.error(result) return S_ERROR("Could Not Perform BK Registration") if 'Failed' in result['Value'] and result['Value']['Failed']: for lfn, error in result['Value']['Failed'].iteritems(): lfnMetadata = {} for lfnMD in performBKRegistration: if lfnMD[ 'lfn'] == lfn: # the lfn is indeed both at lfnMD['lfn'] and at lfnMD['filedict']['LFN'] lfnMetadata = lfnMD['filedict'] break self.setBKRegistrationRequest(lfn, error=error, metaData=lfnMetadata) # Adding a registration request for what whould be registered later if lfnsToRegisterInBKLater: for lfnMD in performBKRegistration: if lfnMD['lfn'] in lfnsToRegisterInBKLater: lfnMetadata = lfnMD['filedict'] self.setBKRegistrationRequest(lfnMD['lfn'], metaData=lfnMetadata) self.workflow_commons['Request'] = self.request return S_OK("Output data uploaded") except Exception as e: # pylint:disable=broad-except self.log.exception('Exception in UploadOutputData', lException=e) self.setApplicationStatus(repr(e)) return S_ERROR(str(e)) finally: super(UploadOutputData, self).finalize(self.version) ############################################################################# def _getLFNsForBKRegistration(self, lfns): """ Check what should be registered immediately in the BK, and what later. If there's a request in self.request for registering the file in the FC, don't perform the registration in the BK immediately: in this case the file should be registered with an operation :param list lfnsList: an iterable of LFNs :return: list of LFNs to be registered immediately """ postPonePerformBKRegistration = [] for op in self.request: if op.Type == 'RegisterFile': # We assume that this is a registerFile operation for the DFC... what else? for fileInOp in op: if fileInOp.LFN in lfns: postPonePerformBKRegistration.append(fileInOp.LFN) return list(set(lfns) - set(postPonePerformBKRegistration)) def _cleanUp(self, final): """ Clean up uploaded data for the LFNs in the list """ lfnList = [] for _fileName, metadata in final.items(): lfnList.append(metadata['lfn']) self.log.verbose("Cleaning up the request, for LFNs: %s" % ', '.join(lfnList)) newRequest = Request() for op in self.request: add = True if op.Type in [ 'PutAndRegister', 'ReplicateAndRegister', 'RegisterFile', 'RegisterReplica', 'RemoveReplica' ]: for files in op: if files.LFN in lfnList: add = False if add: newRequest.addOperation(op) self.request = newRequest self.log.verbose( "And adding RemoveFile operation for LFNs: %s, just in case" % ', '.join(lfnList)) removeFiles = Operation() removeFiles.Type = 'RemoveFile' for lfn in lfnList: removedFile = File() removedFile.LFN = lfn removeFiles.addFile(removedFile) self.request.addOperation(removeFiles)
def execute(self): """ Main execution function. """ self.log.info('Initializing %s' % self.version) result = self.resolveInputVariables() if not result['OK']: self.log.error("Failed to resolve input parameters:", result['Message']) return result if not self.workflowStatus['OK'] or not self.stepStatus['OK']: self.log.verbose('Workflow status = %s, step status = %s' % (self.workflowStatus['OK'], self.stepStatus['OK'])) return S_OK('No output data upload attempted') ##determine the experiment example_file = self.prodOutputLFNs[0] if "/ilc/prod/clic" in example_file: self.experiment = "CLIC" elif "/ilc/prod/ilc/sid" in example_file: self.experiment = 'ILC_SID' elif "/ilc/prod/ilc/mc-dbd" in example_file: self.experiment = 'ILC_ILD' else: self.log.warn("Failed to determine experiment, reverting to default") #Determine the final list of possible output files for the #workflow and all the parameters needed to upload them. result = self.getCandidateFiles(self.outputList, self.prodOutputLFNs, self.outputDataFileMask) if not result['OK']: self.log.error(result['Message']) self.setApplicationStatus(result['Message']) return result fileDict = result['Value'] result = self.getFileMetadata(fileDict) if not result['OK']: self.log.error(result['Message']) self.setApplicationStatus(result['Message']) return result if not result['Value']: self.log.info('No output data files were determined to be uploaded for this workflow') return S_OK() fileMetadata = result['Value'] #Get final, resolved SE list for files final = {} for fileName, metadata in fileMetadata.items(): result = getDestinationSEList(metadata['workflowSE'], DIRAC.siteName(), self.outputMode) if not result['OK']: self.log.error('Could not resolve output data SE', result['Message']) self.setApplicationStatus('Failed To Resolve OutputSE') return result resolvedSE = result['Value'] final[fileName] = metadata final[fileName]['resolvedSE'] = resolvedSE self.log.info('The following files will be uploaded: %s' % (', '.join(final.keys() ))) for fileName, metadata in final.items(): self.log.info('--------%s--------' % fileName) for metaName, metaValue in metadata.items(): self.log.info('%s = %s' % (metaName, metaValue)) #At this point can exit and see exactly what the module would have uploaded if not self.enable: self.log.info('Module is disabled by control flag, would have attempted to upload the \ following files %s' % ', '.join(final.keys())) return S_OK('Module is disabled by control flag') #Disable the watchdog check in case the file uploading takes a long time self.log.info('Creating DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK in order to disable the Watchdog prior to upload') fopen = open('DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK','w') fopen.write('%s' % time.asctime()) fopen.close() #Instantiate the failover transfer client with the global request object failoverTransfer = FailoverTransfer(self._getRequestContainer()) catalogs = self.ops.getValue('Production/%s/Catalogs' % self.experiment, ['FileCatalog', 'LcgFileCatalog']) #One by one upload the files with failover if necessary failover = {} if not self.failoverTest: for fileName, metadata in final.iteritems(): self.log.info("Attempting to store file %s to the following SE(s):\n%s" % (fileName, ', '.join(metadata['resolvedSE']))) result = failoverTransfer.transferAndRegisterFile(fileName, metadata['localpath'], metadata['lfn'], metadata['resolvedSE'], fileMetaDict = metadata['filedict'], fileCatalog = catalogs) if not result['OK']: self.log.error('Could not transfer and register %s with metadata:\n %s' % (fileName, metadata['filedict'])) failover[fileName] = metadata else: #lfn = metadata['lfn'] pass else: failover = final self.failoverSEs = self.ops.getValue("Production/%s/FailOverSE" % self.experiment, self.failoverSEs) cleanUp = False for fileName, metadata in failover.iteritems(): self.log.info('Setting default catalog for failover transfer to FileCatalog') failovers = self.failoverSEs targetSE = metadata['resolvedSE'][0] try:#remove duplicate site, otherwise it will do nasty things where processing the request failovers.remove(targetSE) except ValueError: pass random.shuffle(failovers) metadata['resolvedSE'] = failovers result = failoverTransfer.transferAndRegisterFileFailover(fileName, metadata['localpath'], metadata['lfn'], targetSE, metadata['resolvedSE'], fileMetaDict = metadata['filedict'], fileCatalog = catalogs) if not result['OK']: self.log.error('Could not transfer and register %s with metadata:\n %s' % (fileName, metadata['filedict'])) cleanUp = True break #no point continuing if one completely fails os.remove("DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK") #cleanup the mess self.workflow_commons['Request'] = failoverTransfer.request #If some or all of the files failed to be saved to failover if cleanUp: lfns = [] for fileName, metadata in final.items(): lfns.append(metadata['lfn']) result = self._cleanUp(lfns) return S_ERROR('Failed to upload output data') return S_OK('Output data uploaded')
def execute(self): """ Main execution function. """ self.log.info('Initializing %s' % self.version) result = self.resolveInputVariables() if not result['OK']: self.log.error(result['Message']) return result if not self.workflowStatus['OK'] or not self.stepStatus['OK']: self.log.verbose('Workflow status = %s, step status = %s' % (self.workflowStatus['OK'], self.stepStatus['OK'])) return S_OK('No output data upload attempted') ##determine the experiment example_file = self.prodOutputLFNs[0] if "/ilc/prod/clic" in example_file: self.experiment = "CLIC" elif "/ilc/prod/ilc/sid" in example_file: self.experiment = 'ILC_SID' elif "/ilc/prod/ilc/mc-dbd" in example_file: self.experiment = 'ILC_ILD' else: self.log.warn("Failed to determine experiment, reverting to default") #Determine the final list of possible output files for the #workflow and all the parameters needed to upload them. result = self.getCandidateFiles(self.outputList, self.prodOutputLFNs, self.outputDataFileMask) if not result['OK']: self.setApplicationStatus(result['Message']) return result fileDict = result['Value'] result = self.getFileMetadata(fileDict) if not result['OK']: self.setApplicationStatus(result['Message']) return result if not result['Value']: self.log.info('No output data files were determined to be uploaded for this workflow') return S_OK() fileMetadata = result['Value'] #Get final, resolved SE list for files final = {} for fileName, metadata in fileMetadata.items(): result = getDestinationSEList(metadata['workflowSE'], DIRAC.siteName(), self.outputMode) if not result['OK']: self.log.error('Could not resolve output data SE', result['Message']) self.setApplicationStatus('Failed To Resolve OutputSE') return result resolvedSE = result['Value'] final[fileName] = metadata final[fileName]['resolvedSE'] = resolvedSE self.log.info('The following files will be uploaded: %s' % (string.join(final.keys(), ', '))) for fileName, metadata in final.items(): self.log.info('--------%s--------' % fileName) for n, v in metadata.items(): self.log.info('%s = %s' % (n, v)) #At this point can exit and see exactly what the module would have uploaded if not self.enable: self.log.info('Module is disabled by control flag, would have attempted to upload the \ following files %s' % string.join(final.keys(), ', ')) return S_OK('Module is disabled by control flag') #Disable the watchdog check in case the file uploading takes a long time self.log.info('Creating DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK in order to disable the Watchdog prior to upload') fopen = open('DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK','w') fopen.write('%s' % time.asctime()) fopen.close() #Instantiate the failover transfer client with the global request object failoverTransfer = FailoverTransfer(self.request) catalogs = ['FileCatalog', 'LcgFileCatalog'] #One by one upload the files with failover if necessary failover = {} if not self.failoverTest: for fileName, metadata in final.items(): self.log.info("Attempting to store file %s to the following SE(s):\n%s" % (fileName, string.join(metadata['resolvedSE'], ', '))) result = failoverTransfer.transferAndRegisterFile(fileName, metadata['localpath'], metadata['lfn'], metadata['resolvedSE'], fileGUID = metadata['guid'], fileCatalog = catalogs) if not result['OK']: self.log.error('Could not transfer and register %s with metadata:\n %s' % (fileName, metadata)) failover[fileName] = metadata else: lfn = metadata['lfn'] else: failover = final self.failoverSEs = self.ops.getValue("Production/%s/FailOverSE" % self.experiment, self.failoverSEs) cleanUp = False for fileName, metadata in failover.items(): self.log.info('Setting default catalog for failover transfer to FileCatalog') random.shuffle(self.failoverSEs) targetSE = metadata['resolvedSE'][0] metadata['resolvedSE'] = self.failoverSEs result = failoverTransfer.transferAndRegisterFileFailover(fileName, metadata['localpath'], metadata['lfn'], targetSE, metadata['resolvedSE'], fileGUID = metadata['guid'], fileCatalog = catalogs) if not result['OK']: self.log.error('Could not transfer and register %s with metadata:\n %s' % (fileName, metadata)) cleanUp = True break #no point continuing if one completely fails os.remove("DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK") #cleanup the mess #Now after all operations, retrieve potentially modified request object result = failoverTransfer.getRequestObject() if not result['OK']: self.log.error(result) return S_ERROR('Could not retrieve modified request') self.request = result['Value'] #If some or all of the files failed to be saved to failover if cleanUp: lfns = [] for fileName, metadata in final.items(): lfns.append(metadata['lfn']) result = self.__cleanUp(lfns) self.workflow_commons['Request'] = self.request return S_ERROR('Failed to upload output data') # #Can now register the successfully uploaded files in the BK # if not performBKRegistration: # self.log.info('There are no files to perform the BK registration for, all could be saved to failover') # else: # rm = ReplicaManager() # result = rm.addCatalogFile(performBKRegistration,catalogs=['BookkeepingDB']) # self.log.verbose(result) # if not result['OK']: # self.log.error(result) # return S_ERROR('Could Not Perform BK Registration') # if result['Value']['Failed']: # for lfn,error in result['Value']['Failed'].items(): # self.log.info('BK registration for %s failed with message: "%s" setting failover request' %(lfn,error)) # result = self.request.addSubRequest({'Attributes':{'Operation':'registerFile','ExecutionOrder':0, 'Catalogue':'BookkeepingDB'}},'register') # if not result['OK']: # self.log.error('Could not set registerFile request:\n%s' %result) # return S_ERROR('Could Not Set BK Registration Request') # fileDict = {'LFN':lfn,'Status':'Waiting'} # index = result['Value'] # self.request.setSubRequestFiles(index,'register',[fileDict]) self.workflow_commons['Request'] = self.request return S_OK('Output data uploaded')
def execute(self): """ Main execution function. """ #Have to work out if the module is part of the last step i.e. #user jobs can have any number of steps and we only want #to run the finalization once. currentStep = int(self.step_commons['STEP_NUMBER']) totalSteps = int(self.workflow_commons['TotalSteps']) if currentStep == totalSteps: self.lastStep = True else: self.log.verbose( 'Current step = %s, total steps of workflow = %s, UserJobFinalization will enable itself only \ at the last workflow step.' % (currentStep, totalSteps)) if not self.lastStep: #Not last step, do nothing, proceed happily. return S_OK() result = self.resolveInputVariables() if not result['OK']: self.log.error("Failed to resolve input parameters:", result['Message']) return result self.log.info('Initializing %s' % self.version) if not self.workflowStatus['OK'] or not self.stepStatus['OK']: ##Something went wrong in the step or the workflow, do nothing. self.log.verbose( 'Workflow status = %s, step status = %s' % (self.workflowStatus['OK'], self.stepStatus['OK'])) return S_OK('No output data upload attempted') self.request.RequestName = 'job_%d_request.xml' % int(self.jobID) self.request.JobID = self.jobID self.request.SourceComponent = "Job_%d" % int(self.jobID) if not self.userOutputData: self.log.info( 'No user output data is specified for this job, nothing to do') return S_OK('No output data to upload') #Determine the final list of possible output files for the #workflow and all the parameters needed to upload them. outputList = [] possible_files = [] for i in self.userOutputData: files = getGlobbedFiles(i) for possible_file in files: if possible_file in possible_files: #Don't have twice the same file continue outputList.append({ 'outputDataType': i.split('.')[-1].upper( ), #this would be used to sort the files in different dirs 'outputDataSE': self.userOutputSE, 'outputFile': os.path.basename(possible_file) }) possible_files.append(os.path.basename(possible_file)) self.log.info('Constructing user output LFN(s) for %s' % (', '.join(self.userOutputData))) if not self.jobID: self.jobID = 12345 owner = '' if 'Owner' in self.workflow_commons: owner = self.workflow_commons['Owner'] else: res = getCurrentOwner() if not res['OK']: self.log.error('Could not find proxy') return S_ERROR('Could not obtain owner from proxy') owner = res['Value'] vo = '' if self.workflow_commons.has_key('VO'): vo = self.workflow_commons['VO'] else: res = getVOfromProxyGroup() if not res['OK']: self.log.error('Failed finding the VO') return S_ERROR('Could not obtain VO from proxy') vo = res['Value'] result = constructUserLFNs(int(self.jobID), vo, owner, possible_files, self.userOutputPath) if not result['OK']: self.log.error('Could not create user LFNs', result['Message']) return result userOutputLFNs = result['Value'] self.log.verbose('Calling getCandidateFiles( %s, %s)' % (outputList, userOutputLFNs)) result = self.getCandidateFiles(outputList, userOutputLFNs) if not result['OK']: if not self.ignoreapperrors: self.log.error(result['Message']) self.setApplicationStatus(result['Message']) return S_OK() fileDict = result['Value'] result = self.getFileMetadata(fileDict) if not result['OK']: if not self.ignoreapperrors: self.log.error(result['Message']) self.setApplicationStatus(result['Message']) return S_OK() if not result['Value']: if not self.ignoreapperrors: self.log.info( 'No output data files were determined to be uploaded for this workflow' ) self.setApplicationStatus('No Output Data Files To Upload') return S_OK() fileMetadata = result['Value'] orderedSEs = self.userOutputSE self.log.info('Ordered list of output SEs is: %s' % (', '.join(orderedSEs))) final = {} for fileName, metadata in fileMetadata.items(): final[fileName] = metadata final[fileName]['resolvedSE'] = orderedSEs #At this point can exit and see exactly what the module will upload if not self.enable: self.log.info( 'Module is disabled by control flag, would have attempted \ to upload the following files %s' % ', '.join(final.keys())) for fileName, metadata in final.items(): self.log.info('--------%s--------' % fileName) for n, v in metadata.items(): self.log.info('%s = %s' % (n, v)) return S_OK('Module is disabled by control flag') #Instantiate the failover transfer client with the global request object failoverTransfer = FailoverTransfer(self.request) #One by one upload the files with failover if necessary replication = {} failover = {} uploaded = [] if not self.failoverTest: for fileName, metadata in final.items(): self.log.info( "Attempting to store file %s to the following SE(s):\n%s" % (fileName, ', '.join(metadata['resolvedSE']))) replicateSE = '' result = failoverTransfer.transferAndRegisterFile( fileName, metadata['localpath'], metadata['lfn'], metadata['resolvedSE'], fileMetaDict=metadata, fileCatalog=self.userFileCatalog) if not result['OK']: self.log.error( 'Could not transfer and register %s with metadata:\n %s' % (fileName, metadata)) failover[fileName] = metadata else: #Only attempt replication after successful upload lfn = metadata['lfn'] uploaded.append(lfn) seList = metadata['resolvedSE'] if result['Value'].has_key('uploadedSE'): uploadedSE = result['Value']['uploadedSE'] for se in seList: if not se == uploadedSE: replicateSE = se break if replicateSE and lfn: self.log.info('Will attempt to replicate %s to %s' % (lfn, replicateSE)) replication[lfn] = replicateSE else: failover = final cleanUp = False for fileName, metadata in failover.items(): random.shuffle(self.failoverSEs) targetSE = metadata['resolvedSE'][0] metadata['resolvedSE'] = self.failoverSEs result = failoverTransfer.transferAndRegisterFileFailover( fileName, metadata['localpath'], metadata['lfn'], targetSE, self.failoverSEs, fileMetaDict=metadata, fileCatalog=self.userFileCatalog) if not result['OK']: self.log.error( 'Could not transfer and register %s with metadata:\n %s' % (fileName, metadata)) cleanUp = True continue #for users can continue even if one completely fails else: lfn = metadata['lfn'] uploaded.append(lfn) #For files correctly uploaded must report LFNs to job parameters if uploaded: report = ', '.join(uploaded) self.jobReport.setJobParameter('UploadedOutputData', report) self.request = failoverTransfer.request #If some or all of the files failed to be saved to failover if cleanUp: self.workflow_commons['Request'] = self.request #Leave any uploaded files just in case it is useful for the user #do not try to replicate any files. return S_ERROR('Failed To Upload Output Data') #If there is now at least one replica for uploaded files can trigger replication rm = ReplicaManager() self.log.info( 'Sleeping for 10 seconds before attempting replication of recently uploaded files' ) time.sleep(10) for lfn, repSE in replication.items(): result = rm.replicateAndRegister(lfn, repSE, catalog=self.userFileCatalog) if not result['OK']: self.log.info( 'Replication failed with below error but file already exists in Grid storage with \ at least one replica:\n%s' % (result)) self.workflow_commons['Request'] = self.request self.generateFailoverFile() self.setApplicationStatus('Job Finished Successfully') return S_OK('Output data uploaded')