def __getPrefix(self): op = Operations("glast.org") self.userprefix = None res = getProxyInfo() if res["OK"]: if "username" in res["Value"]: user = res["Value"]["username"] self.userprefix = "/glast.org/user/%s/%s/" % (user[0], user) else: self.log.error("Proxy could not be found") return 1 task_category = os.environ["GPL_TASKCATEGORY"] if not task_category: task_category = op.getValue("Pipeline/TaskCategory", None) if not task_category: self.log.error("Could not find task category") return 1 self.prefixDest = op.getValue("Pipeline/StorageElementBasePath", self.userprefix) self.stagingDest = ( self.prefixDest + "/" + task_category + "/" + os.environ["PIPELINE_TASK"] + "/" + os.environ["PIPELINE_STREAM"] ) return 0
def getValidStatusTypes(): ''' Returns from the OperationsHelper: RSSConfiguration/GeneralConfig/Resources ''' DEFAULTS = { 'Site' : { 'StatusType' : "''" }, 'Service' : { 'StatusType' : "''" }, 'Resource' : { 'StatusType' : "''" }, 'StorageElement': { 'StatusType' : [ 'Read', 'Write', 'Remove', 'Check' ] } } opHelper = Operations() sections = opHelper.getSections( 'RSSConfiguration/GeneralConfig/Resources' ) if not sections[ 'OK' ]: return DEFAULTS result = {} for section in sections[ 'Value' ]: res = opHelper.getValue( 'RSSConfiguration/GeneralConfig/Resources/%s/StatusType' % section ) if res is None: if DEFAULTS.has_key( section ): result[ section ] = { 'StatusType' : DEFAULTS[ section ] } else: result[ section ] = { 'StatusType' : None } else: result[ section ] = { 'StatusType' : Utils.getTypedList( res ) } return result
def configure( self, csSection, submitPool ): """ Here goes common configuration for all PilotDirectors """ self.configureFromSection( csSection ) self.reloadConfiguration( csSection, submitPool ) # Get the defaults for the Setup where the Director is running opsHelper = Operations() self.installVersion = opsHelper.getValue( cfgPath( 'Pilot', 'Version' ), [ self.installVersion ] )[0] self.installProject = opsHelper.getValue( cfgPath( 'Pilot', 'Project' ), self.installProject ) self.installation = opsHelper.getValue( cfgPath( 'Pilot', 'Installation' ), self.installation ) self.pilotExtensionsList = opsHelper.getValue( "Pilot/Extensions", self.pilotExtensionsList ) self.log.info( '===============================================' ) self.log.info( 'Configuration:' ) self.log.info( '' ) self.log.info( ' Target Grids: ', ', '.join( self.targetGrids ) ) self.log.info( ' Install script: ', self.install ) self.log.info( ' Pilot script: ', self.pilot ) self.log.info( ' Install Ver: ', self.installVersion ) if self.installProject: self.log.info( ' Project: ', self.installProject ) if self.installation: self.log.info( ' Installation: ', self.installation ) if self.extraPilotOptions: self.log.info( ' Extra Options: ', ' '.join( self.extraPilotOptions ) ) self.log.info( ' ListMatch: ', self.enableListMatch ) self.log.info( ' Private %: ', self.privatePilotFraction * 100 ) if self.enableListMatch: self.log.info( ' ListMatch Delay:', self.listMatchDelay ) self.listMatchCache.purgeExpired()
def submitProbeJobs(self, ce): """ Submit some jobs to the CEs """ #need credentials, should be there since the initialize from DIRAC.Interfaces.API.Dirac import Dirac d = Dirac() from DIRAC.Interfaces.API.Job import Job from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations import DIRAC ops = Operations() scriptname = ops.getValue("ResourceStatus/SofwareManagementScript", self.script) j = Job() j.setDestinationCE(ce) j.setCPUTime(1000) j.setName("Probe %s" % ce) j.setJobGroup("SoftwareProbe") j.setExecutable("%s/GlastDIRAC/ResourceStatusSystem/Client/%s" % (DIRAC.rootPath, scriptname), logFile='SoftwareProbe.log') j.setOutputSandbox('*.log') res = d.submit(j) if not res['OK']: return res return S_OK()
def __getBackendsFromCFG(self, cfgPath): """ Get backends from the configuration and register them in LoggingRoot. This is the new way to get the backends providing a general configuration. :params cfgPath: string of the configuration path """ # We have to put the import line here to avoid a dependancy loop from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations from DIRAC import gConfig # get the second last string representing the component type in the configuration # example : 'Agents', 'Services' component = cfgPath.split("/")[-2] operation = Operations() # Search desired backends in the component desiredBackends = gConfig.getValue("%s/%s" % (cfgPath, 'LogBackends'), []) if not desiredBackends: # Search desired backends in the operation section according to the # component type desiredBackends = operation.getValue( "Logging/Default%sBackends" % component, []) if not desiredBackends: # Search desired backends in the operation section desiredBackends = operation.getValue("Logging/DefaultBackends", []) if not desiredBackends: # Default value desiredBackends = ['stdout'] return desiredBackends
def initializeLesHouchesFileManagerHandler(serviceInfo): """ Initialize the service """ ops = Operations() res = ops.getOptionsDict("/Models") if not res["OK"]: return res templates = res["Value"] cfgPath = serviceInfo["serviceSectionPath"] location = "" location = ops.getValue("%s/BasePath" % cfgPath, location) if not location: gLogger.error("Path to LesHouches files not defined") return S_ERROR("Path to LesHouches files not defined in CS") missing = False global ModelsDict for template, tfile in templates.items(): ModelsDict[template] = {} ModelsDict[template]["file"] = tfile if not tfile: ModelsDict[template]["content"] = [""] continue file_path = os.path.join([location, tfile]) if not os.path.exists(file_path): gLogger.error("Missing %s" % file_path) missing = True break LesHouchesFile = open(file_path, "r") ModelsDict[template]["content"] = LesHouchesFile.readlines() LesHouchesFile.close() if missing: return S_ERROR("File missing") return S_OK()
def resolveDeps(sysconfig, appli, appversion): """ Resolve the dependencies :param str sysconfig: system configuration :param str appli: application name :param str appversion: application version :return: list of dictionaries """ log = gLogger.getSubLogger("resolveDeps") ops = Operations() deps = ops.getSections('/AvailableTarBalls/%s/%s/%s/Dependencies' % (sysconfig, appli, appversion), '') depsarray = [] if deps['OK']: for dep in deps['Value']: vers = ops.getValue('/AvailableTarBalls/%s/%s/%s/Dependencies/%s/version' % (sysconfig, appli, appversion, dep), '') depvers = '' if vers: depvers = vers else: log.error("Retrieving dependency version for %s failed, skipping to next !" % (dep)) continue log.verbose("Found dependency %s %s" % (dep, depvers)) depdict = {} depdict["app"] = dep depdict["version"] = depvers depsarray.append(depdict) ##resolve recursive dependencies depsofdeps = resolveDeps(sysconfig, dep, depvers) depsarray.extend(depsofdeps) else: log.verbose("Could not find any dependency for %s %s, ignoring" % (appli, appversion)) return depsarray
def getSoftwareFolder(platform, appname, appversion): """ Discover location of a given folder, either the local or the shared area :param str platform: platform :param str appname: name of the application :param str appversion: version of the application """ res = checkCVMFS(platform, [appname, appversion]) if res["OK"]: return S_OK(res['Value'][0]) app_tar = Operations().getValue('/AvailableTarBalls/%s/%s/%s/TarBall'%(platform, appname, appversion), '') if not app_tar: return S_ERROR("Could not find %s, %s name from CS" % (appname, appversion) ) if app_tar.count("gz"): folder = app_tar.replace(".tgz","").replace(".tar.gz", "") else: folder = app_tar localArea = getLocalAreaLocation() sharedArea = getSharedAreaLocation() if os.path.exists(os.path.join(localArea, folder)): mySoftwareRoot = localArea elif os.path.exists(os.path.join(sharedArea, folder)): mySoftwareRoot = sharedArea else: return S_ERROR('Missing installation of %s!' % folder) mySoftDir = os.path.join(mySoftwareRoot, folder) return S_OK(mySoftDir)
def __init__( self, **kwargs ): Client.__init__( self, **kwargs ) opsH = Operations() self.maxResetCounter = opsH.getValue( 'Productions/ProductionFilesMaxResetCounter', 10 ) self.setServer( 'Transformation/TransformationManager' )
def configure(self, csSection, submitPool): """ Here goes common configuration for all PilotDirectors """ self.configureFromSection(csSection) self.reloadConfiguration(csSection, submitPool) # Get the defaults for the Setup where the Director is running opsHelper = Operations() self.installVersion = opsHelper.getValue(cfgPath("Pilot", "Version"), [self.installVersion])[0] self.installProject = opsHelper.getValue(cfgPath("Pilot", "Project"), self.installProject) self.installation = opsHelper.getValue(cfgPath("Pilot", "Installation"), self.installation) self.pilotExtensionsList = opsHelper.getValue("Pilot/Extensions", self.pilotExtensionsList) self.log.info("===============================================") self.log.info("Configuration:") self.log.info("") self.log.info(" Target Grids: ", ", ".join(self.targetGrids)) self.log.info(" Install script: ", self.install) self.log.info(" Pilot script: ", self.pilot) self.log.info(" Pilot modules", self.extraModules) self.log.info(" Install Ver: ", self.installVersion) if self.installProject: self.log.info(" Project: ", self.installProject) if self.installation: self.log.info(" Installation: ", self.installation) if self.extraPilotOptions: self.log.info(" Extra Options: ", " ".join(self.extraPilotOptions)) self.log.info(" ListMatch: ", self.enableListMatch) self.log.info(" Private %: ", self.privatePilotFraction * 100) if self.enableListMatch: self.log.info(" ListMatch Delay:", self.listMatchDelay) self.listMatchCache.purgeExpired()
def web_getLaunchpadOpts(self): defaultParams = {"JobName" : [1, 'DIRAC'], "Executable" : [1, "/bin/ls"], "Arguments" : [1, "-ltrA"], "OutputSandbox" : [1, "std.out, std.err"], "InputData" : [0, ""], "OutputData" : [0, ""], "OutputSE" : [0, "DIRAC-USER"], "OutputPath": [0, ""], "CPUTime" : [0, "86400"], "Site" : [0, ""], "BannedSite" : [0, ""], "Platform" : [0, "Linux_x86_64_glibc-2.5"], "Priority" : [0, "5"], "StdError" : [0, "std.err"], "StdOutput" : [0, "std.out"], "Parameters" : [0, "0"], "ParameterStart" : [0, "0"], "ParameterStep" : [0, "1"]} delimiter = gConfig.getValue("/Website/Launchpad/ListSeparator" , ',') options = self.__getOptionsFromCS(delimiter=delimiter) # platform = self.__getPlatform() # if platform and options: # if not options.has_key("Platform"): # options[ "Platform" ] = platform # else: # csPlatform = list(options[ "Platform" ]) # allPlatforms = csPlatform + platform # platform = uniqueElements(allPlatforms) # options[ "Platform" ] = platform gLogger.debug("Combined options from CS: %s" % options) override = gConfig.getValue("/Website/Launchpad/OptionsOverride" , False) gLogger.info("end __getLaunchpadOpts") # Updating the default values from OptionsOverride configuration branch for key in options: if key not in defaultParams: defaultParams[key] = [ 0, "" ] defaultParams[key][1] = options[key][0] # Reading of the predefined sets of launchpad parameters values obj = Operations() predefinedSets = {} launchpadSections = obj.getSections("Launchpad") import pprint if launchpadSections['OK']: for section in launchpadSections["Value"]: predefinedSets[section] = {} sectionOptions = obj.getOptionsDict("Launchpad/" + section) pprint.pprint(sectionOptions) if sectionOptions['OK']: predefinedSets[section] = sectionOptions["Value"] self.write({"success":"true", "result":defaultParams, "predefinedSets":predefinedSets})
class RssConfiguration: ''' RssConfiguration: { Config: { State : Active | InActive, Cache : 300, FromAddress : '*****@*****.**' StatusType : { default : all, StorageElement: ReadAccess, WriteAccess, CheckAccess, RemoveAccess } } } ''' def __init__( self ): self.opsHelper = Operations() def getConfigCache( self, default = 300 ): ''' Gets from <pathToRSSConfiguration>/Config the value of Cache ''' return self.opsHelper.getValue( '%s/Config/Cache' % _rssConfigPath, default ) def getConfigFromAddress( self, default = None ): ''' Gets from <pathToRSSConfiguration>/Config the value of FromAddress ''' return self.opsHelper.getValue( '%s/Config/FromAddress' % _rssConfigPath, default ) def getConfigStatusType( self, elementType = None ): ''' Gets all the status types per elementType, if not given, it takes default from CS. If not, hardcoded variable DEFAULT. ''' _DEFAULTS = ( 'all', ) res = self.opsHelper.getOptionsDict( '%s/Config/StatusTypes' % _rssConfigPath ) if res[ 'OK' ]: if elementType in res[ 'Value' ]: return List.fromChar( res[ 'Value' ][ elementType ] ) if 'default' in res[ 'Value' ]: return List.fromChar( res[ 'Value' ][ 'default' ] ) return _DEFAULTS
def getCountryMappingTier1( country ): """ Returns the Tier1 site mapped to a country code """ opsHelper = Operations() res = getCountryMapping( country ) if not res['OK']: return res mappedCountry = res['Value'] tier1 = opsHelper.getValue( '/Countries/%s/Tier1' % mappedCountry, '' ) if not tier1: return S_ERROR( "No Tier1 assigned to %s" % mappedCountry ) return S_OK( tier1 )
def initialize(self): credDict = self.getRemoteCredentials() self.ownerDN = credDict['DN'] self.ownerGroup = credDict['group'] operations = Operations(group=self.ownerGroup) self.globalJobsInfo = operations.getValue('/Services/JobMonitoring/GlobalJobsInfo', True) self.jobPolicy = JobPolicy(self.ownerDN, self.ownerGroup, self.globalJobsInfo) self.jobPolicy.setJobDB(gJobDB) return S_OK()
def getSteeringFileDirName(platform, application, applicationVersion): """ Locate the path of the steering file directory assigned to the specified application """ ops = Operations() version = ops.getValue('/AvailableTarBalls/%s/%s/%s/Dependencies/steeringfiles/version' % (platform, application, applicationVersion), '') if not version: return S_ERROR("Could not find attached SteeringFile version") return getSteeringFileDir(platform, version)
def constructUserLFNs(jobID, vo, owner, outputFiles, outputPath): """ This method is used to supplant the standard job wrapper output data policy for ILC. The initial convention adopted for user output files is the following: If outputpath is not defined: <vo>/user/<initial e.g. s>/<owner e.g. sposs>/<yearMonth e.g. 2010_02>/<subdir>/<fileName> Otherwise: <vo>/user/<initial e.g. s>/<owner e.g. sposs>/<outputPath>/<fileName> """ initial = owner[:1] subdir = str(jobID/1000) timeTup = datetime.date.today().timetuple() yearMonth = '%s_%s' % (timeTup[0], string.zfill(str(timeTup[1]), 2)) outputLFNs = {} if not vo: #res = gConfig.getOption("/DIRAC/VirtualOrganization", "ilc") res = getVOfromProxyGroup() if not res['OK']: gLogger.error('Could not get VO from CS, assuming ilc') vo = 'ilc' else: vo = res['Value'] ops = Operations(vo = vo) lfn_prefix = ops.getValue("LFNUserPrefix", "user") #Strip out any leading or trailing slashes but allow fine structure if outputPath: outputPathList = string.split(outputPath, os.sep) newPath = [] for i in outputPathList: if i: newPath.append(i) outputPath = string.join(newPath, os.sep) if not type(outputFiles) == types.ListType: outputFiles = [outputFiles] for outputFile in outputFiles: #strip out any fine structure in the output file specified by the user, restrict to output file names #the output path field can be used to describe this outputFile = outputFile.replace('LFN:', '') lfn = '' if outputPath: lfn = os.sep+os.path.join(vo, lfn_prefix, initial, owner, outputPath + os.sep + os.path.basename(outputFile)) else: lfn = os.sep+os.path.join(vo, lfn_prefix, initial, owner, yearMonth, subdir, str(jobID)) + os.sep + os.path.basename(outputFile) outputLFNs[outputFile] = lfn outputData = outputLFNs.values() if outputData: gLogger.info('Created the following output data LFN(s):\n%s' % (string.join(outputData, '\n'))) else: gLogger.info('No output LFN(s) constructed') return S_OK(outputData)
def _getCatalogs( self ): # Get the eligible catalogs first # First, look in the Operations, if nothing defined look in /Resources for backward compatibility result = getVOfromProxyGroup() if not result['OK']: return result vo = result['Value'] opHelper = Operations( vo = vo ) result = opHelper.getSections( '/Services/FileCatalogs' ) fileCatalogs = [] operationsFlag = False if result['OK']: fileCatalogs = result['Value'] operationsFlag = True else: res = gConfig.getSections( self.rootConfigPath, listOrdered = True ) if not res['OK']: errStr = "FileCatalog._getCatalogs: Failed to get file catalog configuration." gLogger.error( errStr, res['Message'] ) return S_ERROR( errStr ) fileCatalogs = res['Value'] # Get the catalogs now for catalogName in fileCatalogs: res = self._getCatalogConfigDetails( catalogName ) if not res['OK']: return res catalogConfig = res['Value'] if operationsFlag: result = opHelper.getOptionsDict( '/Services/FileCatalogs/%s' % catalogName ) if not result['OK']: return result catalogConfig.update( result['Value'] ) if catalogConfig['Status'] == 'Active': res = self._generateCatalogObject( catalogName ) if not res['OK']: return res oCatalog = res['Value'] master = catalogConfig['Master'] # If the catalog is read type if re.search( 'Read', catalogConfig['AccessType'] ): if master: self.readCatalogs.insert( 0, ( catalogName, oCatalog, master ) ) else: self.readCatalogs.append( ( catalogName, oCatalog, master ) ) # If the catalog is write type if re.search( 'Write', catalogConfig['AccessType'] ): if master: self.writeCatalogs.insert( 0, ( catalogName, oCatalog, master ) ) else: self.writeCatalogs.append( ( catalogName, oCatalog, master ) ) return S_OK()
def __init__(self, argumentsDict): """ Standard constructor """ self.arguments = argumentsDict self.name = COMPONENT_NAME self.log = gLogger.getSubLogger(self.name) op = Operations() self.arguments.setdefault('Configuration', {})['AllReplicas'] = op.getValue('InputDataPolicy/AllReplicas', False) self.arguments['Configuration'].setdefault('Protocol', op.getValue('InputDataPolicy/Protocols/Local', [])) self.arguments['Configuration'].setdefault('RemoteProtocol', op.getValue('InputDataPolicy/Protocols/Remote', [])) # By default put input data into the current directory self.arguments.setdefault('InputDataDirectory', 'CWD')
def getSiteSEMapping( gridName = '' ): """ Returns a dictionary of all sites and their localSEs as a list, e.g. {'LCG.CERN.ch':['CERN-RAW','CERN-RDST',...]} If gridName is specified, result is restricted to that Grid type. """ siteSEMapping = {} gridTypes = gConfig.getSections( 'Resources/Sites/' ) if not gridTypes['OK']: gLogger.warn( 'Problem retrieving sections in /Resources/Sites' ) return gridTypes gridTypes = gridTypes['Value'] if gridName: if not gridName in gridTypes: return S_ERROR( 'Could not get sections for /Resources/Sites/%s' % gridName ) gridTypes = [gridName] gLogger.debug( 'Grid Types are: %s' % ( ', '.join( gridTypes ) ) ) for grid in gridTypes: sites = gConfig.getSections( '/Resources/Sites/%s' % grid ) if not sites['OK']: gLogger.warn( 'Problem retrieving /Resources/Sites/%s section' % grid ) return sites for candidate in sites['Value']: candidateSEs = gConfig.getValue( '/Resources/Sites/%s/%s/SE' % ( grid, candidate ), [] ) if candidateSEs: siteSEMapping[candidate] = candidateSEs else: gLogger.debug( 'No SEs defined for site %s' % candidate ) # Add Sites from the SiteLocalSEMapping in the CS cfgLocalSEPath = cfgPath( 'SiteLocalSEMapping' ) opsHelper = Operations() result = opsHelper.getOptionsDict( cfgLocalSEPath ) if result['OK']: mapping = result['Value'] for site in mapping: ses = opsHelper.getValue( cfgPath( cfgLocalSEPath, site ), [] ) if not ses: continue if gridName: if gridName != site.split( '.' )[0]: continue if site not in siteSEMapping: siteSEMapping[site] = [] for se in ses: if se not in siteSEMapping[site]: siteSEMapping[site].append( se ) return S_OK( siteSEMapping )
def getCountryMapping( country ): """ Determines the associated country from the country code""" mappedCountries = [country] opsHelper = Operations() while True: mappedCountry = opsHelper.getValue( '/Countries/%s/AssignedTo' % country, country ) if mappedCountry == country: break elif mappedCountry in mappedCountries: return S_ERROR( 'Circular mapping detected for %s' % country ) else: country = mappedCountry mappedCountries.append( mappedCountry ) return S_OK( mappedCountry )
def getPluginParam(self, name, default=None): """ Get plugin parameters using specific settings or settings defined in the CS Caution: the type returned is that of the default value """ # get the value of a parameter looking 1st in the CS if default is not None: valueType = type(default) else: valueType = None # First look at a generic value... optionPath = "TransformationPlugins/%s" % (name) value = Operations().getValue(optionPath, None) self.logVerbose("Default plugin param %s: '%s'" % (optionPath, value)) # Then look at a plugin-specific value optionPath = "TransformationPlugins/%s/%s" % (self.plugin, name) value = Operations().getValue(optionPath, value) self.logVerbose("Specific plugin param %s: '%s'" % (optionPath, value)) if value is not None: default = value # Finally look at a transformation-specific parameter value = self.params.get(name, default) self.logVerbose( "Transformation plugin param %s: '%s'. Convert to %s" % (name, value, str(valueType))) if valueType and not isinstance(value, valueType): if valueType is list: try: value = ast.literal_eval(value) if value and value != 'None' else [] # literal_eval('SE-DST') -> ValueError # literal_eval('SE_MC-DST') -> SyntaxError # Don't ask... except (ValueError, SyntaxError): value = [val for val in value.replace(' ', '').split(',') if val] elif valueType is int: value = int(value) elif valueType is float: value = float(value) elif valueType is bool: if value in ('False', 'No', 'None', None, 0): value = False else: value = bool(value) elif valueType is not str: self.logWarn( "Unknown parameter type (%s) for %s, passed as string" % (str(valueType), name)) self.logVerbose("Final plugin param %s: '%s'" % (name, value)) return value
def getUserRootDir(self): '''get user's initial root directory''' username = self.getUserName() initial = username[:1] vo = getVO() if not vo: vo = 'bes' ops = Operations(vo = vo) user_prefix = ops.getValue('LFNUserPrefix', 'user') basePath = '/' + vo + '/' + user_prefix + '/' + initial + '/' + username return basePath
def getShifterProxy( shifterType, fileName = False ): """ This method returns a shifter's proxy :param shifterType: ProductionManager / DataManager... """ if fileName: try: os.makedirs( os.path.dirname( fileName ) ) except OSError: pass opsHelper = Operations() userName = opsHelper.getValue( cfgPath( 'Shifter', shifterType, 'User' ), '' ) if not userName: return S_ERROR( "No shifter User defined for %s" % shifterType ) result = CS.getDNForUsername( userName ) if not result[ 'OK' ]: return result userDN = result[ 'Value' ][0] result = CS.findDefaultGroupForDN( userDN ) if not result['OK']: return result defaultGroup = result['Value'] userGroup = opsHelper.getValue( cfgPath( 'Shifter', shifterType, 'Group' ), defaultGroup ) vomsAttr = CS.getVOMSAttributeForGroup( userGroup ) if vomsAttr: gLogger.info( "Getting VOMS [%s] proxy for shifter %s@%s (%s)" % ( vomsAttr, userName, userGroup, userDN ) ) result = gProxyManager.downloadVOMSProxyToFile( userDN, userGroup, filePath = fileName, requiredTimeLeft = 86400, cacheTime = 86400 ) else: gLogger.info( "Getting proxy for shifter %s@%s (%s)" % ( userName, userGroup, userDN ) ) result = gProxyManager.downloadProxyToFile( userDN, userGroup, filePath = fileName, requiredTimeLeft = 86400, cacheTime = 86400 ) if not result[ 'OK' ]: return result chain = result[ 'chain' ] fileName = result[ 'Value' ] return S_OK( { 'DN' : userDN, 'username' : userName, 'group' : userGroup, 'chain' : chain, 'proxyFile' : fileName } )
def __init__( self, catalogs = None, vo = None ): """ Default constructor """ self.valid = True self.timeout = 180 self.readCatalogs = [] self.writeCatalogs = [] self.metaCatalogs = [] self.rootConfigPath = '/Resources/FileCatalogs' self.vo = vo if vo else getVOfromProxyGroup().get( 'Value', None ) self.opHelper = Operations( vo = self.vo ) if catalogs is None: catalogList = [] elif type( catalogs ) in types.StringTypes: catalogList = [catalogs] else: catalogList = catalogs if catalogList: res = self._getSelectedCatalogs( catalogList ) else: res = self._getCatalogs() if not res['OK']: self.valid = False elif ( len( self.readCatalogs ) == 0 ) and ( len( self.writeCatalogs ) == 0 ): self.valid = False
def initialize(self): """Sets defaults """ self.am_setModuleParam("shifterProxy", "ProductionManager") self.basepath = self.am_getOption("BasePath", "") if not self.basepath: return S_ERROR("Missing mandatory option BasePath") self.baselogpath = self.am_getOption("BaseLogPath", "") if not self.baselogpath: return S_ERROR("Missing mandatory option BaseLogPath") self.ops = Operations() dest_se = self.ops.getValue("Transformations/ArchiveSE", "") if not dest_se: return S_ERROR("Missing mandatory option ArchiveSE") self.storageElement = StorageElement( dest_se ) baselfn = self.ops.getValue("Transformations/BaseLogLFN", "") if not baselfn: return S_ERROR("Missing mandatory option Transformations/BaseLogLFN") self.baselfn = baselfn self.transclient = TransformationClient() self.log.info("Running ") return S_OK()
def __init__( self, pilotAgentsDB = None, jobDB = None, tqDB = None, jlDB = None, opsHelper = None ): """ c'tor """ if pilotAgentsDB: self.pilotAgentsDB = pilotAgentsDB else: self.pilotAgentsDB = PilotAgentsDB() if jobDB: self.jobDB = jobDB else: self.jobDB = JobDB() if tqDB: self.tqDB = tqDB else: self.tqDB = TaskQueueDB() if jlDB: self.jlDB = jlDB else: self.jlDB = JobLoggingDB() if opsHelper: self.opsHelper = opsHelper else: self.opsHelper = Operations() self.log = gLogger.getSubLogger( "Matcher" ) self.limiter = Limiter( jobDB = self.jobDB, opsHelper = self.opsHelper )
def __init__(self, script = None): super(ProductionJob, self).__init__( script ) self.prodVersion = __RCSID__ self.dryrun = False self.created = False self.checked = False self.call_finalization = False self.finalsdict = {} self.transfid = 0 self.type = 'Production' self.csSection = '/Production/Defaults' self.ops = Operations() self.fc = FileCatalogClient() self.trc = TransformationClient() self.defaultProdID = '12345' self.defaultProdJobID = '12345' self.jobFileGroupSize = 1 self.nbtasks = 1 self.slicesize =0 self.basename = '' self.basepath = self.ops.getValue('/Production/CLIC/BasePath','/ilc/prod/clic/') self.evttype = '' self.datatype = '' self.energycat = '' self.detector = '' self.currtrans = None self.description = '' self.finalpaths = [] self.finalMetaDict = defaultdict( dict ) self.prodMetaDict = {} self.finalMetaDictNonSearch = {} self.metadict_external = {} self.outputStorage = '' self.proxyinfo = getProxyInfo() self.inputdataquery = False self.inputBKSelection = {} self.plugin = 'Standard' self.prodGroup = '' self.prodTypes = ['MCGeneration', 'MCSimulation', 'Test', 'MCReconstruction', 'MCReconstruction_Overlay', 'Merge', 'Split', 'MCGeneration_ILD', 'MCSimulation_ILD', 'MCReconstruction_ILD', 'MCReconstruction_Overlay_ILD', 'Split_ILD' ] self.prodparameters = {} self.prodparameters['NbInputFiles'] = 1 self.prodparameters['nbevts'] = 0 #self.prodparameters["SWPackages"] = '' self._addParameter(self.workflow, "IS_PROD", 'JDL', True, "This job is a production job") if not script: self.__setDefaults() self._recBasePaths = {} self.maxFCFoldersToCheck = 100000
def __init__(self, argumentsDict): """ Standard constructor """ self.arguments = argumentsDict self.name = COMPONENT_NAME self.log = gLogger.getSubLogger(self.name) self.ops = Operations()
def __init__(self): ''' Constructor ''' self.log = gLogger.getSubLogger("WrapperCall") self.ops = Operations("glast.org") self.stdError = ''
def __init__( self, catalogs = [], vo = None ): """ Default constructor """ self.valid = True self.timeout = 180 self.readCatalogs = [] self.writeCatalogs = [] self.vo = vo if not vo: result = getVOfromProxyGroup() if not result['OK']: return result self.vo = result['Value'] self.opHelper = Operations( vo = self.vo ) self.reHelper = Resources( vo = self.vo ) if type( catalogs ) in types.StringTypes: catalogs = [catalogs] if catalogs: res = self._getSelectedCatalogs( catalogs ) else: res = self._getCatalogs() if not res['OK']: self.valid = False elif ( len( self.readCatalogs ) == 0 ) and ( len( self.writeCatalogs ) == 0 ): self.valid = False
def configHelper(voList): """ A helper function to gather necessary Rucio client options from the CS. :param volist: list of VO names, or a VO name (str) :return: a dictionary of a form {vo: params, vo: params,} :rtype: dict """ log = gLogger.getLocalSubLogger("RucioSynchronizerHelper") if isinstance(voList, str): voList = [voList] clientConfig = {} log.debug("VO list to consider for synchronization: ", voList) # locate RucioFileCatalog type in resources first result = gConfig.getSections("/Resources/FileCatalogs") catNames = [] if result["OK"]: catalogs = result["Value"] log.debug("File catalogs defined in Resources", catalogs) for catalog in catalogs: result = gConfig.getOptionsDict(getCatalogPath(catalog)) if result["OK"]: options = result["Value"] log.debug("Rucio Catalog candidate options", options) if options.get("Status", None) == "Active" and options.get( "CatalogType", None) == "RucioFileCatalog": catNames.append(catalog) else: log.error("No catalogs defined in Resources.") return S_ERROR("No catalogs defined in Resources.") log.info( "Active FileCatalogs candidates of type RucioFileCatalog found in Resources:", catNames) # we found (possibly more that one) candidate, now we look for it in Operations # to find out which one is used by which VO. There can be only one # Rucio catalog per VO. for vo in voList: opHelper = Operations(vo=vo) result = opHelper.getSections("/Services/Catalogs") if result["OK"]: catSections = set(result["Value"]) else: log.warn("No Services/Catalogs section in Operations, for ", "VO=%s (skipped)" % vo) continue selectedCatalog = list(catSections.intersection(catNames)) if len(selectedCatalog) > 1: log.error( "VO %s: Services/Catalogs section mis-configured." " More that one Rucio file catalog", "[VO: %s, Catalogs: %s]" % (vo, selectedCatalog), ) continue if not selectedCatalog: log.warn("VO is not using RucioFileCatalog (VO skipped)", "[VO: %s]" % vo) continue # check if the section name is in the catalog list to use. # if the list is not empty it has to contain the selected catalog. fileCatalogs = opHelper.getValue("/Services/Catalogs/CatalogList", []) if fileCatalogs and selectedCatalog[0] not in fileCatalogs: log.warn( "VO is not using RucioFileCatalog - it is not in the catalog list", "[VO: %s]" % vo) continue # now collect Rucio specific parameters for the VO params = {} result = gConfig.getOptionsDict(getCatalogPath(selectedCatalog[0])) if result["OK"]: optDict = result["Value"] params["rucioHost"] = optDict.get("RucioHost", None) params["authHost"] = optDict.get("AuthHost", None) params["privilegedAccount"] = optDict.get("PrivilegedAccount", "root") clientConfig[vo] = params log.info("RSEs and users will be configured in Rucio for the VO:", vo) else: log.error(result["Message"]) return clientConfig
if not (resR['OK'] or resW['OK'] or resC['OK']): DIRAC.exit(-1) if not (writeAllowed or readAllowed or checkAllowed or removeAllowed): gLogger.info("No storage elements were allowed") DIRAC.exit(-1) if mute: gLogger.notice('Email is muted by script switch') DIRAC.exit(0) subject = '%s storage elements allowed for use' % len( writeAllowed + readAllowed + checkAllowed + removeAllowed) addressPath = 'EMail/Production' address = Operations().getValue(addressPath, '') body = '' if read: body = "%s\n\nThe following storage elements were allowed for reading:" % body for se in readAllowed: body = "%s\n%s" % (body, se) if write: body = "%s\n\nThe following storage elements were allowed for writing:" % body for se in writeAllowed: body = "%s\n%s" % (body, se) if check: body = "%s\n\nThe following storage elements were allowed for checking:" % body for se in checkAllowed: body = "%s\n%s" % (body, se) if remove:
class Matcher: """Logic for matching""" def __init__(self, pilotAgentsDB=None, jobDB=None, tqDB=None, jlDB=None, opsHelper=None, pilotRef=None): """c'tor""" if pilotAgentsDB: self.pilotAgentsDB = pilotAgentsDB else: self.pilotAgentsDB = PilotAgentsDB() if jobDB: self.jobDB = jobDB else: self.jobDB = JobDB() if tqDB: self.tqDB = tqDB else: self.tqDB = TaskQueueDB() if jlDB: self.jlDB = jlDB else: self.jlDB = JobLoggingDB() if opsHelper: self.opsHelper = opsHelper else: self.opsHelper = Operations() if pilotRef: self.log = gLogger.getSubLogger("[%s]Matcher" % pilotRef) self.pilotAgentsDB.log = gLogger.getSubLogger("[%s]Matcher" % pilotRef) self.jobDB.log = gLogger.getSubLogger("[%s]Matcher" % pilotRef) self.tqDB.log = gLogger.getSubLogger("[%s]Matcher" % pilotRef) self.jlDB.log = gLogger.getSubLogger("[%s]Matcher" % pilotRef) else: self.log = gLogger.getSubLogger("Matcher") self.limiter = Limiter(jobDB=self.jobDB, opsHelper=self.opsHelper, pilotRef=pilotRef) self.siteClient = SiteStatus() def selectJob(self, resourceDescription, credDict): """Main job selection function to find the highest priority job matching the resource capacity""" startTime = time.time() resourceDict = self._getResourceDict(resourceDescription, credDict) # Make a nice print of the resource matching parameters toPrintDict = dict(resourceDict) if "MaxRAM" in resourceDescription: toPrintDict["MaxRAM"] = resourceDescription["MaxRAM"] if "NumberOfProcessors" in resourceDescription: toPrintDict["NumberOfProcessors"] = resourceDescription[ "NumberOfProcessors"] toPrintDict["Tag"] = [] if "Tag" in resourceDict: for tag in resourceDict["Tag"]: if not tag.endswith("GB") and not tag.endswith("Processors"): toPrintDict["Tag"].append(tag) if not toPrintDict["Tag"]: toPrintDict.pop("Tag") self.log.info("Resource description for matching", printDict(toPrintDict)) negativeCond = self.limiter.getNegativeCondForSite( resourceDict["Site"], resourceDict.get("GridCE")) result = self.tqDB.matchAndGetJob(resourceDict, negativeCond=negativeCond) if not result["OK"]: raise RuntimeError(result["Message"]) result = result["Value"] if not result["matchFound"]: self.log.info("No match found") return {} jobID = result["jobId"] resAtt = self.jobDB.getJobAttributes( jobID, ["OwnerDN", "OwnerGroup", "Status"]) if not resAtt["OK"]: raise RuntimeError("Could not retrieve job attributes") if not resAtt["Value"]: raise RuntimeError("No attributes returned for job") if not resAtt["Value"]["Status"] == "Waiting": self.log.error("Job matched by the TQ is not in Waiting state", str(jobID)) result = self.tqDB.deleteJob(jobID) if not result["OK"]: raise RuntimeError(result["Message"]) raise RuntimeError("Job %s is not in Waiting state" % str(jobID)) self._reportStatus(resourceDict, jobID) result = self.jobDB.getJobJDL(jobID) if not result["OK"]: raise RuntimeError("Failed to get the job JDL") resultDict = {} resultDict["JDL"] = result["Value"] resultDict["JobID"] = jobID matchTime = time.time() - startTime self.log.verbose("Match time", "[%s]" % str(matchTime)) # Get some extra stuff into the response returned resOpt = self.jobDB.getJobOptParameters(jobID) if resOpt["OK"]: for key, value in resOpt["Value"].items(): resultDict[key] = value resAtt = self.jobDB.getJobAttributes(jobID, ["OwnerDN", "OwnerGroup"]) if not resAtt["OK"]: raise RuntimeError("Could not retrieve job attributes") if not resAtt["Value"]: raise RuntimeError("No attributes returned for job") if self.opsHelper.getValue("JobScheduling/CheckMatchingDelay", True): self.limiter.updateDelayCounters(resourceDict["Site"], jobID) pilotInfoReportedFlag = resourceDict.get("PilotInfoReportedFlag", False) if not pilotInfoReportedFlag: self._updatePilotInfo(resourceDict) self._updatePilotJobMapping(resourceDict, jobID) resultDict["DN"] = resAtt["Value"]["OwnerDN"] resultDict["Group"] = resAtt["Value"]["OwnerGroup"] resultDict["PilotInfoReportedFlag"] = True return resultDict def _getResourceDict(self, resourceDescription, credDict): """from resourceDescription to resourceDict (just various mods)""" resourceDict = self._processResourceDescription(resourceDescription) resourceDict = self._checkCredentials(resourceDict, credDict) self._checkPilotVersion(resourceDict) if not self._checkMask(resourceDict): # Banned destinations can only take Test jobs resourceDict["JobType"] = "Test" self.log.verbose("Resource description") for key in resourceDict: self.log.debug("%s : %s" % (key.rjust(20), resourceDict[key])) return resourceDict def _processResourceDescription(self, resourceDescription): """Check and form the resource description dictionary :param resourceDescription: a ceDict coming from a JobAgent, for example. :return: updated dictionary of resource description parameters """ resourceDict = {} for name in singleValueDefFields: if name in resourceDescription: resourceDict[name] = resourceDescription[name] for name in multiValueMatchFields: if name in resourceDescription: resourceDict[name] = resourceDescription[name] if resourceDescription.get("Tag"): tags = resourceDescription["Tag"] resourceDict["Tag"] = (tags if isinstance(tags, list) else list( {tag.strip("\"' ") for tag in tags.strip("[]").split(",")})) if "RequiredTag" in resourceDescription: requiredTagsList = (list({ tag.strip("\"' ") for tag in resourceDescription["RequiredTag"].strip( "[]").split(",") }) if isinstance(resourceDescription["RequiredTag"], str) else resourceDescription["RequiredTag"]) resourceDict["RequiredTag"] = requiredTagsList if "JobID" in resourceDescription: resourceDict["JobID"] = resourceDescription["JobID"] # Convert MaxRAM and NumberOfProcessors parameters into a list of tags maxRAM = resourceDescription.get("MaxRAM") if maxRAM: try: maxRAM = int(maxRAM / 1000) except ValueError: maxRAM = None nProcessors = resourceDescription.get("NumberOfProcessors") if nProcessors: try: nProcessors = int(nProcessors) except ValueError: nProcessors = None for param, key in [(maxRAM, "GB"), (nProcessors, "Processors")]: if param and param <= 1024: paramList = list(range(2, param + 1)) paramTags = ["%d%s" % (par, key) for par in paramList] if paramTags: resourceDict.setdefault("Tag", []).extend(paramTags) # Add 'MultiProcessor' to the list of tags if nProcessors and nProcessors > 1: resourceDict.setdefault("Tag", []).append("MultiProcessor") # Add 'WholeNode' to the list of tags if "WholeNode" in resourceDescription: resourceDict.setdefault("Tag", []).append("WholeNode") if "Tag" in resourceDict: resourceDict["Tag"] = list(set(resourceDict["Tag"])) if "RequiredTag" in resourceDict: resourceDict["RequiredTag"] = list(set( resourceDict["RequiredTag"])) for k in ( "DIRACVersion", "ReleaseVersion", "ReleaseProject", "VirtualOrganization", "PilotReference", "PilotBenchmark", "PilotInfoReportedFlag", ): if k in resourceDescription: resourceDict[k] = resourceDescription[k] return resourceDict def _reportStatus(self, resourceDict, jobID): """Reports the status of the matched job in jobDB and jobLoggingDB Do not fail if errors happen here """ attNames = ["Status", "MinorStatus", "ApplicationStatus", "Site"] attValues = ["Matched", "Assigned", "Unknown", resourceDict["Site"]] result = self.jobDB.setJobAttributes(jobID, attNames, attValues) if not result["OK"]: self.log.error( "Problem reporting job status", "setJobAttributes, jobID = %s: %s" % (jobID, result["Message"])) else: self.log.verbose("Set job attributes for jobID", jobID) result = self.jlDB.addLoggingRecord(jobID, status=JobStatus.MATCHED, minorStatus="Assigned", source="Matcher") if not result["OK"]: self.log.error( "Problem reporting job status", "addLoggingRecord, jobID = %s: %s" % (jobID, result["Message"])) else: self.log.verbose("Added logging record for jobID", jobID) def _checkMask(self, resourceDict): """Check the mask: are we allowed to run normal jobs? FIXME: should we move to site OR SE? """ if "Site" not in resourceDict: self.log.error("Missing Site Name in Resource JDL") raise RuntimeError("Missing Site Name in Resource JDL") # Check if site is allowed result = self.siteClient.getUsableSites(resourceDict["Site"]) if not result["OK"]: self.log.error("Internal error", "siteClient.getUsableSites: %s" % result["Message"]) raise RuntimeError("Internal error") if resourceDict["Site"] not in result["Value"]: return False return True def _updatePilotInfo(self, resourceDict): """Update pilot information - do not fail if we don't manage to do it""" pilotReference = resourceDict.get("PilotReference", "") if pilotReference and pilotReference != "Unknown": gridCE = resourceDict.get("GridCE", "Unknown") site = resourceDict.get("Site", "Unknown") benchmark = resourceDict.get("PilotBenchmark", 0.0) self.log.verbose( "Reporting pilot info", "for %s: gridCE=%s, site=%s, benchmark=%f" % (pilotReference, gridCE, site, benchmark), ) result = self.pilotAgentsDB.setPilotStatus( pilotReference, status=PilotStatus.RUNNING, gridSite=site, destination=gridCE, benchmark=benchmark) if not result["OK"]: self.log.warn( "Problem updating pilot information", "; setPilotStatus. pilotReference: %s; %s" % (pilotReference, result["Message"]), ) def _updatePilotJobMapping(self, resourceDict, jobID): """Update pilot to job mapping information""" pilotReference = resourceDict.get("PilotReference", "") if pilotReference and pilotReference != "Unknown": result = self.pilotAgentsDB.setCurrentJobID(pilotReference, jobID) if not result["OK"]: self.log.error( "Problem updating pilot information", ";setCurrentJobID. pilotReference: %s; %s" % (pilotReference, result["Message"]), ) result = self.pilotAgentsDB.setJobForPilot(jobID, pilotReference, updateStatus=False) if not result["OK"]: self.log.error( "Problem updating pilot information", "; setJobForPilot. pilotReference: %s; %s" % (pilotReference, result["Message"]), ) def _checkCredentials(self, resourceDict, credDict): """Check if we can get a job given the passed credentials""" if Properties.GENERIC_PILOT in credDict["properties"]: # You can only match groups in the same VO if credDict["group"] == "hosts": # for the host case the VirtualOrganization parameter # is mandatory in resourceDict vo = resourceDict.get("VirtualOrganization", "") else: vo = Registry.getVOForGroup(credDict["group"]) if "OwnerGroup" not in resourceDict: result = Registry.getGroupsForVO(vo) if result["OK"]: resourceDict["OwnerGroup"] = result["Value"] else: raise RuntimeError(result["Message"]) else: # If it's a private pilot, the DN has to be the same if Properties.PILOT in credDict["properties"]: self.log.notice( "Setting the resource DN to the credentials DN") resourceDict["OwnerDN"] = credDict["DN"] # If it's a job sharing. The group has to be the same and just check that the DN (if any) # belongs to the same group elif Properties.JOB_SHARING in credDict["properties"]: resourceDict["OwnerGroup"] = credDict["group"] self.log.notice( "Setting the resource group to the credentials group") if "OwnerDN" in resourceDict and resourceDict[ "OwnerDN"] != credDict["DN"]: ownerDN = resourceDict["OwnerDN"] result = Registry.getGroupsForDN(resourceDict["OwnerDN"]) if not result["OK"]: raise RuntimeError(result["Message"]) if credDict["group"] not in result["Value"]: # DN is not in the same group! bad boy. self.log.warn( "You cannot request jobs from this DN, as it does not belong to your group!", "(%s)" % ownerDN, ) resourceDict["OwnerDN"] = credDict["DN"] # Nothing special, group and DN have to be the same else: resourceDict["OwnerDN"] = credDict["DN"] resourceDict["OwnerGroup"] = credDict["group"] return resourceDict def _checkPilotVersion(self, resourceDict): """Check the pilot DIRAC version""" if self.opsHelper.getValue("Pilot/CheckVersion", True): if "ReleaseVersion" not in resourceDict: if "DIRACVersion" not in resourceDict: raise PilotVersionError( "Version check requested and not provided by Pilot") else: pilotVersion = resourceDict["DIRACVersion"] else: pilotVersion = resourceDict["ReleaseVersion"] validVersions = [ convertToPy3VersionNumber(newStyleVersion) for newStyleVersion in self.opsHelper.getValue( "Pilot/Version", []) ] if validVersions and convertToPy3VersionNumber( pilotVersion) not in validVersions: raise PilotVersionError( "Pilot version does not match the production version: %s not in ( %s )" % (pilotVersion, ",".join(validVersions))) # Check project if requested validProject = self.opsHelper.getValue("Pilot/Project", "") if validProject: if "ReleaseProject" not in resourceDict: raise PilotVersionError( "Version check requested but expected project %s not received" % validProject) if resourceDict["ReleaseProject"] != validProject: raise PilotVersionError( "Version check requested but expected project %s != received %s" % (validProject, resourceDict["ReleaseProject"]))
class Transformation( API ): ############################################################################# def __init__( self, transID = 0, transClient = None ): """ c'tor """ super( Transformation, self ).__init__() self.paramTypes = { 'TransformationID' : [types.IntType, types.LongType], 'TransformationName' : types.StringTypes, 'Status' : types.StringTypes, 'Description' : types.StringTypes, 'LongDescription' : types.StringTypes, 'Type' : types.StringTypes, 'Plugin' : types.StringTypes, 'AgentType' : types.StringTypes, 'FileMask' : types.StringTypes, 'TransformationGroup' : types.StringTypes, 'GroupSize' : [types.IntType, types.LongType, types.FloatType], 'InheritedFrom' : [types.IntType, types.LongType], 'Body' : types.StringTypes, 'MaxNumberOfTasks' : [types.IntType, types.LongType], 'EventsPerTask' : [types.IntType, types.LongType]} self.paramValues = { 'TransformationID' : 0, 'TransformationName' : '', 'Status' : 'New', 'Description' : '', 'LongDescription' : '', 'Type' : '', 'Plugin' : 'Standard', 'AgentType' : 'Manual', 'FileMask' : '', 'TransformationGroup' : 'General', 'GroupSize' : 1, 'InheritedFrom' : 0, 'Body' : '', 'MaxNumberOfTasks' : 0, 'EventsPerTask' : 0} self.ops = Operations() self.supportedPlugins = self.ops.getValue('Transformations/AllowedPlugins', ['Broadcast', 'Standard', 'BySize', 'ByShare']) if not transClient: self.transClient = TransformationClient() else: self.transClient = transClient self.serverURL = self.transClient.getServer() self.exists = False if transID: self.paramValues['TransformationID'] = transID res = self.getTransformation() if res['OK']: self.exists = True elif res['Message'] == 'Transformation does not exist': raise AttributeError, 'TransformationID %d does not exist' % transID else: self.paramValues['TransformationID'] = 0 gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % ( transID, self.transClient.serverURL ) ) def setServer( self, server ): self.serverURL = server self.transClient.setServer( self.serverURL ) def getServer( self ): return self.serverURL def reset( self, transID = 0 ): self.__init__( transID ) self.transClient.setServer( self.serverURL ) return S_OK() def setTargetSE( self, seList ): return self.__setSE( 'TargetSE', seList ) def setSourceSE( self, seList ): return self.__setSE( 'SourceSE', seList ) def __setSE( self, se, seList ): if type( seList ) in types.StringTypes: try: seList = eval( seList ) except: seList = seList.replace( ',', ' ' ).split() res = self.__checkSEs( seList ) if not res['OK']: return res self.item_called = se return self.__setParam( seList ) def __getattr__( self, name ): if name.find( 'get' ) == 0: item = name[3:] self.item_called = item return self.__getParam if name.find( 'set' ) == 0: item = name[3:] self.item_called = item return self.__setParam raise AttributeError, name def __getParam( self ): if self.item_called == 'Available': return S_OK( self.paramTypes.keys() ) if self.item_called == 'Parameters': return S_OK( self.paramValues ) if self.item_called in self.paramValues: return S_OK( self.paramValues[self.item_called] ) raise AttributeError, "Unknown parameter for transformation: %s" % self.item_called def __setParam( self, value ): change = False if self.item_called in self.paramTypes: oldValue = self.paramValues[self.item_called] if oldValue != value: if type( value ) in self.paramTypes[self.item_called]: change = True else: raise TypeError, "%s %s %s expected one of %s" % ( self.item_called, value, type( value ), self.paramTypes[self.item_called] ) if not self.item_called in self.paramTypes.keys(): if not self.paramValues.has_key( self.item_called ): change = True else: oldValue = self.paramValues[self.item_called] if oldValue != value: change = True if not change: gLogger.verbose( "No change of parameter %s required" % self.item_called ) else: gLogger.verbose( "Parameter %s to be changed" % self.item_called ) transID = self.paramValues['TransformationID'] if self.exists and transID: res = self.transClient.setTransformationParameter( transID, self.item_called, value ) if not res['OK']: return res self.paramValues[self.item_called] = value return S_OK() def getTransformation( self, printOutput = False ): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal( "No TransformationID known" ) return S_ERROR() res = self.transClient.getTransformation( transID, extraParams = True ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res transParams = res['Value'] for paramName, paramValue in transParams.items(): setter = None setterName = "set%s" % paramName if hasattr( self, setterName ) and callable( getattr( self, setterName ) ): setter = getattr( self, setterName ) if not setterName: gLogger.error( "Unable to invoke setter %s, it isn't a member function" % setterName ) continue setter( paramValue ) if printOutput: gLogger.info( "No printing available yet" ) return S_OK( transParams ) def getTransformationLogging( self, printOutput = False ): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal( "No TransformationID known" ) return S_ERROR() res = self.transClient.getTransformationLogging( transID ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res loggingList = res['Value'] if printOutput: self._printFormattedDictList( loggingList, ['Message', 'MessageDate', 'AuthorDN'], 'MessageDate', 'MessageDate' ) return S_OK( loggingList ) def extendTransformation( self, nTasks, printOutput = False ): return self.__executeOperation( 'extendTransformation', nTasks, printOutput = printOutput ) def cleanTransformation( self, printOutput = False ): res = self.__executeOperation( 'cleanTransformation', printOutput = printOutput ) if res['OK']: self.paramValues['Status'] = 'Cleaned' return res def deleteTransformation( self, printOutput = False ): res = self.__executeOperation( 'deleteTransformation', printOutput = printOutput ) if res['OK']: self.reset() return res def addFilesToTransformation( self, lfns, printOutput = False ): return self.__executeOperation( 'addFilesToTransformation', lfns, printOutput = printOutput ) def setFileStatusForTransformation( self, status, lfns, printOutput = False ): return self.__executeOperation( 'setFileStatusForTransformation', status, lfns, printOutput = printOutput ) def getTransformationTaskStats( self, printOutput = False ): return self.__executeOperation( 'getTransformationTaskStats', printOutput = printOutput ) def getTransformationStats( self, printOutput = False ): return self.__executeOperation( 'getTransformationStats', printOutput = printOutput ) def deleteTasks( self, taskMin, taskMax, printOutput = False ): return self.__executeOperation( 'deleteTasks', taskMin, taskMax, printOutput = printOutput ) def addTaskForTransformation( self, lfns = [], se = 'Unknown', printOutput = False ): return self.__executeOperation( 'addTaskForTransformation', lfns, se, printOutput = printOutput ) def setTaskStatus( self, taskID, status, printOutput = False ): return self.__executeOperation( 'setTaskStatus', taskID, status, printOutput = printOutput ) def __executeOperation( self, operation, *parms, **kwds ): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal( "No TransformationID known" ) return S_ERROR() printOutput = kwds.pop( 'printOutput' ) fcn = None if hasattr( self.transClient, operation ) and callable( getattr( self.transClient, operation ) ): fcn = getattr( self.transClient, operation ) if not fcn: return S_ERROR( "Unable to invoke %s, it isn't a member funtion of TransformationClient" ) res = fcn( transID, *parms, **kwds ) if printOutput: self._prettyPrint( res ) return res def getTransformationFiles( self, fileStatus = [], lfns = [], outputFields = ['FileID', 'LFN', 'Status', 'TaskID', 'TargetSE', 'UsedSE', 'ErrorCount', 'InsertedTime', 'LastUpdate'], orderBy = 'FileID', printOutput = False ): condDict = {'TransformationID':self.paramValues['TransformationID']} if fileStatus: condDict['Status'] = fileStatus if lfns: condDict['LFN'] = lfns res = self.transClient.getTransformationFiles( condDict = condDict ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res if printOutput: if not outputFields: gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) ) elif not res['Value']: gLogger.info( "No tasks found for selection" ) else: self._printFormattedDictList( res['Value'], outputFields, 'FileID', orderBy ) return res def getTransformationTasks( self, taskStatus = [], taskIDs = [], outputFields = ['TransformationID', 'TaskID', 'ExternalStatus', 'ExternalID', 'TargetSE', 'CreationTime', 'LastUpdateTime'], orderBy = 'TaskID', printOutput = False ): condDict = {'TransformationID':self.paramValues['TransformationID']} if taskStatus: condDict['ExternalStatus'] = taskStatus if taskIDs: condDict['TaskID'] = taskIDs res = self.transClient.getTransformationTasks( condDict = condDict ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res if printOutput: if not outputFields: gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) ) elif not res['Value']: gLogger.info( "No tasks found for selection" ) else: self._printFormattedDictList( res['Value'], outputFields, 'TaskID', orderBy ) return res ############################################################################# def getTransformations( self, transID = [], transStatus = [], outputFields = ['TransformationID', 'Status', 'AgentType', 'TransformationName', 'CreationDate'], orderBy = 'TransformationID', printOutput = False ): condDict = {} if transID: condDict['TransformationID'] = transID if transStatus: condDict['Status'] = transStatus res = self.transClient.getTransformations( condDict = condDict ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res if printOutput: if not outputFields: gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) ) elif not res['Value']: gLogger.info( "No tasks found for selection" ) else: self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy ) return res ############################################################################# def addTransformation( self, addFiles = True, printOutput = False ): res = self._checkCreation() if not res['OK']: return self._errorReport( res, 'Failed transformation sanity check' ) if printOutput: gLogger.info( "Will attempt to create transformation with the following parameters" ) self._prettyPrint( self.paramValues ) res = self.transClient.addTransformation( self.paramValues['TransformationName'], self.paramValues['Description'], self.paramValues['LongDescription'], self.paramValues['Type'], self.paramValues['Plugin'], self.paramValues['AgentType'], self.paramValues['FileMask'], transformationGroup = self.paramValues['TransformationGroup'], groupSize = self.paramValues['GroupSize'], inheritedFrom = self.paramValues['InheritedFrom'], body = self.paramValues['Body'], maxTasks = self.paramValues['MaxNumberOfTasks'], eventsPerTask = self.paramValues['EventsPerTask'], addFiles = addFiles ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res transID = res['Value'] self.exists = True self.setTransformationID( transID ) gLogger.notice( "Created transformation %d" % transID ) for paramName, paramValue in self.paramValues.items(): if not self.paramTypes.has_key( paramName ): res = self.transClient.setTransformationParameter( transID, paramName, paramValue ) if not res['OK']: gLogger.error( "Failed to add parameter", "%s %s" % ( paramName, res['Message'] ) ) gLogger.notice( "To add this parameter later please execute the following." ) gLogger.notice( "oTransformation = Transformation(%d)" % transID ) gLogger.notice( "oTransformation.set%s(...)" % paramName ) return S_OK( transID ) def _checkCreation( self ): if self.paramValues['TransformationID']: gLogger.info( "You are currently working with an active transformation definition." ) gLogger.info( "If you wish to create a new transformation reset the TransformationID." ) gLogger.info( "oTransformation.reset()" ) return S_ERROR() requiredParameters = ['TransformationName', 'Description' , 'LongDescription', 'Type'] for parameter in requiredParameters: if not self.paramValues[parameter]: gLogger.info( "%s is not defined for this transformation. This is required..." % parameter ) self.paramValues[parameter] = raw_input( "Please enter the value of " + parameter + " " ) plugin = self.paramValues['Plugin'] if not plugin in self.supportedPlugins: gLogger.info( "The selected Plugin (%s) is not known to the transformation agent." % plugin ) res = self.__promptForParameter( 'Plugin', choices = self.supportedPlugins, default = 'Standard' ) if not res['OK']: return res self.paramValues['Plugin'] = res['Value'] plugin = self.paramValues['Plugin'] #checkPlugin = "_check%sPlugin" % plugin #fcn = None #if hasattr( self, checkPlugin ) and callable( getattr( self, checkPlugin ) ): # fcn = getattr( self, checkPlugin ) #if not fcn: # return S_ERROR( "Unable to invoke %s, it isn't a member function" % checkPlugin ) #res = fcn() return S_OK() def _checkBySizePlugin( self ): return self._checkStandardPlugin() def _checkBySharePlugin( self ): return self._checkStandardPlugin() def _checkStandardPlugin( self ): groupSize = self.paramValues['GroupSize'] if ( groupSize <= 0 ): gLogger.info( "The GroupSize was found to be less than zero. It has been set to 1." ) res = self.setGroupSize( 1 ) if not res['OK']: return res return S_OK() def _checkBroadcastPlugin( self ): gLogger.info( "The Broadcast plugin requires the following parameters be set: %s" % ( ', '.join( ['SourceSE', 'TargetSE'] ) ) ) requiredParams = ['SourceSE', 'TargetSE'] for requiredParam in requiredParams: if ( not self.paramValues.has_key( requiredParam ) ) or ( not self.paramValues[requiredParam] ): paramValue = raw_input( "Please enter " + requiredParam + " " ) setter = None setterName = "set%s" % requiredParam if hasattr( self, setterName ) and callable( getattr( self, setterName ) ): setter = getattr( self, setterName ) if not setter: return S_ERROR( "Unable to invoke %s, this function hasn't been implemented." % setterName ) ses = paramValue.replace( ',', ' ' ).split() res = setter( ses ) if not res['OK']: return res return S_OK() def __checkSEs( self, seList ): resources = Resources() res = resources.getEligibleResources( 'Storage' ) if not res['OK']: return self._errorReport( res, 'Failed to get possible StorageElements' ) missing = [] for se in seList: if not se in res['Value']: gLogger.error( "StorageElement %s is not known" % se ) missing.append( se ) if missing: return S_ERROR( "%d StorageElements not known" % len( missing ) ) return S_OK() def __promptForParameter( self, parameter, choices = [], default = '', insert = True ): res = promptUser( "Please enter %s" % parameter, choices = choices, default = default ) if not res['OK']: return self._errorReport( res ) gLogger.notice( "%s will be set to '%s'" % ( parameter, res['Value'] ) ) paramValue = res['Value'] if insert: setter = None setterName = "set%s" % parameter if hasattr( self, setterName ) and callable( getattr( self, setterName ) ): setter = getattr( self, setterName ) if not setter: return S_ERROR( "Unable to invoke %s, it isn't a member function of Transformation!" ) res = setter( paramValue ) if not res['OK']: return res return S_OK( paramValue )
def __init__(self, agentName, loadName, baseAgentName=False, properties={}): """ Common __init__ method for all Agents. All Agent modules must define: __doc__ They are used to populate __codeProperties The following Options are used from the Configuration: - /DIRAC/Setup - Status - Enabled - PollingTime default = 120 - MaxCycles default = 500 - WatchdogTime default = 0 (disabled) - ControlDirectory control/SystemName/AgentName - WorkDirectory work/SystemName/AgentName - shifterProxy '' - shifterProxyLocation WorkDirectory/SystemName/AgentName/.shifterCred It defines the following default Options that can be set via Configuration (above): - MonitoringEnabled True - Enabled True if Status == Active - PollingTime 120 - MaxCycles 500 - ControlDirectory control/SystemName/AgentName - WorkDirectory work/SystemName/AgentName - shifterProxy False - shifterProxyLocation work/SystemName/AgentName/.shifterCred different defaults can be set in the initialize() method of the Agent using am_setOption() In order to get a shifter proxy in the environment during the execute() the configuration Option 'shifterProxy' must be set, a default may be given in the initialize() method. """ self.log = gLogger.getSubLogger(agentName) self.__basePath = rootPath self.__agentModule = None self.agentName = agentName self.__codeProperties = {} self.__getCodeInfo() self.__moduleProperties = { "fullName": agentName, "loadName": loadName, "section": PathFinder.getAgentSection(agentName), "loadSection": PathFinder.getAgentSection(loadName), "cyclesDone": 0, "totalElapsedTime": 0, "setup": gConfig.getValue("/DIRAC/Setup", "Unknown"), "alive": True, } self.__moduleProperties["system"], self.__moduleProperties[ "agentName"] = agentName.split("/") self.__configDefaults = {} self.__configDefaults["MonitoringEnabled"] = True self.__configDefaults["Enabled"] = self.am_getOption( "Status", "Active").lower() in ("active") self.__configDefaults["PollingTime"] = self.am_getOption( "PollingTime", 120) self.__configDefaults["MaxCycles"] = self.am_getOption( "MaxCycles", 500) self.__configDefaults["WatchdogTime"] = self.am_getOption( "WatchdogTime", 0) self.__configDefaults["ControlDirectory"] = os.path.join( self.__basePath, "control", *agentName.split("/")) self.__configDefaults["WorkDirectory"] = os.path.join( self.__basePath, "work", *agentName.split("/")) self.__configDefaults["shifterProxy"] = "" self.__configDefaults["shifterProxyLocation"] = os.path.join( self.__configDefaults["WorkDirectory"], ".shifterCred") if isinstance(properties, dict): for key in properties: self.__moduleProperties[key] = properties[key] self.__moduleProperties["executors"] = [(self.execute, ())] self.__moduleProperties["shifterProxy"] = False self.__monitorLastStatsUpdate = -1 self.activityMonitoring = False # Check if monitoring is enabled if "Monitoring" in Operations().getMonitoringBackends( monitoringType="AgentMonitoring"): self.activityMonitoring = True
def addShifter( self, shifters = None ): """ Adds or modify one or more shifters. Also, adds the shifter section in case this is not present. Shifter identities are used in several places, mostly for running agents shifters should be in the form {'ShifterRole':{'User':'******', 'Group':'aDIRACGroup'}} :return: S_OK/S_ERROR """ def getOpsSection(): """ Where is the shifters section? """ vo = CSGlobals.getVO() setup = CSGlobals.getSetup() if vo: res = gConfig.getSections( '/Operations/%s/%s/Shifter' % (vo, setup) ) if res['OK']: return S_OK( '/Operations/%s/%s/Shifter' % ( vo, setup ) ) res = gConfig.getSections( '/Operations/%s/Defaults/Shifter' % vo ) if res['OK']: return S_OK( '/Operations/%s/Defaults/Shifter' % vo ) else: res = gConfig.getSections( '/Operations/%s/Shifter' % setup ) if res['OK']: return S_OK( '/Operations/%s/Shifter' % setup ) res = gConfig.getSections( '/Operations/Defaults/Shifter' ) if res['OK']: return S_OK( '/Operations/Defaults/Shifter' ) return S_ERROR( "No shifter section" ) if shifters is None: shifters = {} if not self.__initialized['OK']: return self.__initialized # get current shifters opsH = Operations( ) currentShifterRoles = opsH.getSections( 'Shifter' ) if not currentShifterRoles['OK']: # we assume the shifter section is not present currentShifterRoles = [] else: currentShifterRoles = currentShifterRoles['Value'] currentShiftersDict = {} for currentShifterRole in currentShifterRoles: currentShifter = opsH.getOptionsDict( 'Shifter/%s' % currentShifterRole ) if not currentShifter['OK']: return currentShifter currentShifter = currentShifter['Value'] currentShiftersDict[currentShifterRole] = currentShifter # Removing from shifters what does not need to be changed for sRole in shifters: if sRole in currentShiftersDict: if currentShiftersDict[sRole] == shifters[sRole]: shifters.pop( sRole ) # get shifters section to modify section = getOpsSection() # Is this section present? if not section['OK']: if section['Message'] == "No shifter section": gLogger.warn( section['Message'] ) gLogger.info( "Adding shifter section" ) vo = CSGlobals.getVO() if vo: section = '/Operations/%s/Defaults/Shifter' % vo else: section = '/Operations/Defaults/Shifter' res = self.__csMod.createSection( section ) if not res: gLogger.error( "Section %s not created" % section ) return S_ERROR( "Section %s not created" % section ) else: gLogger.error( section['Message'] ) return section else: section = section['Value'] #add or modify shifters for shifter in shifters: self.__csMod.removeSection( section + '/' + shifter ) self.__csMod.createSection( section + '/' + shifter ) self.__csMod.createSection( section + '/' + shifter + '/' + 'User' ) self.__csMod.createSection( section + '/' + shifter + '/' + 'Group' ) self.__csMod.setOptionValue( section + '/' + shifter + '/' + 'User', shifters[shifter]['User'] ) self.__csMod.setOptionValue( section + '/' + shifter + '/' + 'Group', shifters[shifter]['Group'] ) self.csModified = True return S_OK( True )
def _getPilotOptions(self, taskQueueDict, pilotsToSubmit): # Need to limit the maximum number of pilots to submit at once # For generic pilots this is limited by the number of use of the tokens and the # maximum number of jobs in Filling mode, but for private Jobs we need an extra limitation: pilotsToSubmit = max( min(pilotsToSubmit, int(50 / self.maxJobsInFillMode)), 1) pilotOptions = [] privateIfGenericTQ = self.privatePilotFraction > random.random() privateTQ = ('PilotTypes' in taskQueueDict and 'private' in [t.lower() for t in taskQueueDict['PilotTypes']]) forceGeneric = 'ForceGeneric' in taskQueueDict submitPrivatePilot = (privateIfGenericTQ or privateTQ) and not forceGeneric if submitPrivatePilot: self.log.verbose('Submitting private pilots for TaskQueue %s' % taskQueueDict['TaskQueueID']) ownerDN = taskQueueDict['OwnerDN'] ownerGroup = taskQueueDict['OwnerGroup'] # User Group requirement pilotOptions.append('-G %s' % taskQueueDict['OwnerGroup']) # check if group allows jobsharing ownerGroupProperties = getPropertiesForGroup(ownerGroup) if not 'JobSharing' in ownerGroupProperties: # Add Owner requirement to pilot pilotOptions.append("-O '%s'" % ownerDN) if privateTQ: pilotOptions.append( '-o /Resources/Computing/CEDefaults/PilotType=private') maxJobsInFillMode = self.maxJobsInFillMode else: #For generic jobs we'll submit mixture of generic and private pilots self.log.verbose('Submitting generic pilots for TaskQueue %s' % taskQueueDict['TaskQueueID']) #ADRI: Find the generic group result = findGenericPilotCredentials( group=taskQueueDict['OwnerGroup']) if not result['OK']: self.log.error(ERROR_GENERIC_CREDENTIALS, result['Message']) return S_ERROR(ERROR_GENERIC_CREDENTIALS) ownerDN, ownerGroup = result['Value'] result = gProxyManager.requestToken( ownerDN, ownerGroup, max(pilotsToSubmit, self.maxJobsInFillMode)) if not result['OK']: self.log.error(ERROR_TOKEN, result['Message']) return S_ERROR(ERROR_TOKEN) (token, numberOfUses) = result['Value'] pilotsToSubmit = min(numberOfUses, pilotsToSubmit) pilotOptions.append('-o /Security/ProxyToken=%s' % token) pilotsToSubmit = max( 1, (pilotsToSubmit - 1) / self.maxJobsInFillMode + 1) maxJobsInFillMode = int(numberOfUses / pilotsToSubmit) # Use Filling mode pilotOptions.append('-M %s' % maxJobsInFillMode) # Debug pilotOptions.append('-d') # Setup. pilotOptions.append('-S %s' % taskQueueDict['Setup']) # CS Servers csServers = gConfig.getServersList() if len(csServers) > 3: # Remove the master master = gConfigurationData.getMasterServer() if master in csServers: csServers.remove(master) pilotOptions.append('-C %s' % ",".join(csServers)) # DIRAC Extensions to be used in pilots # ubeda: I'm not entirely sure if we can use here the same opsHelper as in line # line +352 pilotExtensionsList = Operations().getValue("Pilot/Extensions", []) extensionsList = [] if pilotExtensionsList: if pilotExtensionsList[0] != 'None': extensionsList = pilotExtensionsList else: extensionsList = getCSExtensions() if extensionsList: pilotOptions.append('-e %s' % ",".join(extensionsList)) #Get DIRAC version and project, There might be global Setup defaults and per VO/Setup defaults (from configure) opsHelper = Operations(group=taskQueueDict['OwnerGroup'], setup=taskQueueDict['Setup']) # Requested version of DIRAC (it can be a list, so we take the fist one) version = opsHelper.getValue(cfgPath('Pilot', 'Version'), [self.installVersion])[0] pilotOptions.append('-r %s' % version) # Requested Project to install installProject = opsHelper.getValue(cfgPath('Pilot', 'Project'), self.installProject) if installProject: pilotOptions.append('-l %s' % installProject) installation = opsHelper.getValue(cfgPath('Pilot', 'Installation'), self.installation) if installation: pilotOptions.append("-V %s" % installation) # Requested CPU time pilotOptions.append('-T %s' % taskQueueDict['CPUTime']) if self.submitPoolOption not in self.extraPilotOptions: pilotOptions.append(self.submitPoolOption) if self.extraPilotOptions: pilotOptions.extend(self.extraPilotOptions) return S_OK((pilotOptions, pilotsToSubmit, ownerDN, ownerGroup, submitPrivatePilot, privateTQ))
def executeForVO(self, vo): """ Execute one SE and user synchronisation cycle for a VO. :param str vo: Virtual organisation name. :return: S_OK or S_ERROR :rtype: dict """ valid_protocols = ["srm", "gsiftp", "davs", "https", "root"] default_email = None try: try: client = Client(account="root", auth_type="userpass") except Exception as err: self.log.info( "Login to Rucio as root with password failed. Will try host cert/key", str(err)) certKeyTuple = Locations.getHostCertificateAndKeyLocation() if not certKeyTuple: self.log.error("Hostcert/key location not set") return S_ERROR("Hostcert/key location not set") hostcert, hostkey = certKeyTuple self.log.info("Logging in with a host cert/key pair:") self.log.debug("account: ", self.clientConfig[vo]["privilegedAccount"]) self.log.debug("rucio host: ", self.clientConfig[vo]["rucioHost"]) self.log.debug("auth host: ", self.clientConfig[vo]["authHost"]) self.log.debug("CA cert path: ", self.caCertPath) self.log.debug("Cert location: ", hostcert) self.log.debug("Key location: ", hostkey) self.log.debug("VO: ", vo) client = Client( account=self.clientConfig[vo]["privilegedAccount"], rucio_host=self.clientConfig[vo]["rucioHost"], auth_host=self.clientConfig[vo]["authHost"], ca_cert=self.caCertPath, auth_type="x509", creds={ "client_cert": hostcert, "client_key": hostkey }, timeout=600, user_agent="rucio-clients", vo=vo, ) self.log.info("Rucio client instantiated for VO:", vo) # Get the storage elements from Dirac Configuration and create them in Rucio newRSE = False self.log.info("Synchronizing SEs for VO ", vo) result = getStorageElements(vo) if result["OK"]: rses = [rse["rse"] for rse in client.list_rses()] for se in result["Value"]: if se not in rses: # The SE doesn't exist. Will create it newRSE = True self.log.info( "Rucio Storage Element does not exist and will be created:", se) try: client.add_rse(rse=se, deterministic=True, volatile=False) except Exception as err: self.log.error( "Cannot create RSE", "[RSE: %s, Error: %s]" % (se, str(err))) continue # Add RSE attributes for the new RSE ret = gConfig.getOptionsDict( "Resources/FTSEndpoints/FTS3") ftsList = "" if ret["OK"]: ftsList = ",".join(ret["Value"].values()) dictRSEAttributes = { "naming_convention": "BelleII", "ANY": True, "fts": ftsList } for key in dictRSEAttributes: self.log.info( "Setting RSE attributes", "[RSE: %s, Attr. name: %s, Value: %s]" % (se, key, dictRSEAttributes[key]), ) client.add_rse_attribute( se, key, value=dictRSEAttributes[key]) client.set_local_account_limit("root", se, 100000000000000000) # Create the protocols try: protocols = client.get_protocols(se) except RSEProtocolNotSupported as err: self.log.info("Cannot get protocols for", "[RSE %s, Error: %s]" % (se, str(err))) protocols = [] existing_protocols = [] for prot in protocols: existing_protocols.append( (str(prot["scheme"]), str(prot["hostname"]), str(prot["port"]), str(prot["prefix"]))) protocols_to_create = [] for params in result["Value"][se]: prot = ( str(params["scheme"]), str(params["hostname"]), str(params["port"]), str(params["prefix"]), ) protocols_to_create.append(prot) if prot not in existing_protocols and prot[ 0] in valid_protocols: # The protocol defined in Dirac does not exist in Rucio. Will be created self.log.info( "Will create new protocol:", "%s://%s:%s%s on %s" % (params["scheme"], params["hostname"], params["port"], params["prefix"], se), ) try: client.add_protocol(rse=se, params=params) except Duplicate as err: self.log.info( "Protocol already exists on", "[RSE: %s, schema:%s]" % (se, params["scheme"])) except Exception as err: self.log.error( "Cannot create protocol on RSE", "[RSE: %s, Error: %s]" % (se, str(err))) else: update = False for protocol in protocols: if prot == ( str(protocol["scheme"]), str(protocol["hostname"]), str(protocol["port"]), str(protocol["prefix"]), ): # Check if the protocol defined in Dirac has the same priority as the one defined in Rucio for domain in ["lan", "wan"]: for activity in [ "read", "write", "delete" ]: if (params["domains"][domain] [activity] != protocol["domains"][domain] [activity]): update = True break if (params["domains"]["wan"] ["third_party_copy"] != protocol["domains"]["wan"] ["third_party_copy"]): update = True if update: data = { "prefix": params["prefix"], "read_lan": params["domains"]["lan"]["read"], "read_wan": params["domains"]["wan"]["read"], "write_lan": params["domains"]["lan"]["write"], "write_wan": params["domains"]["wan"]["write"], "delete_lan": params["domains"]["lan"]["delete"], "delete_wan": params["domains"]["wan"]["delete"], "third_party_copy": params["domains"]["wan"]["write"], } self.log.info( "Will update protocol:", "%s://%s:%s%s on %s" % ( params["scheme"], params["hostname"], params["port"], params["prefix"], se, ), ) client.update_protocols( rse=se, scheme=params["scheme"], data=data, hostname=params["hostname"], port=params["port"], ) for prot in existing_protocols: if prot not in protocols_to_create: self.log.info( "Will delete protocol:", "%s://%s:%s%s on %s" % (prot[0], prot[1], prot[2], prot[3], se)) client.delete_protocols(se, scheme=prot[0], hostname=prot[1], port=prot[2]) else: self.log.error("Cannot get SEs:", result["Message"]) # If new RSE added, add distances rses = [rse["rse"] for rse in client.list_rses()] if newRSE: self.log.info("Adding distances") for src_rse, dest_rse in permutations(rses, r=2): try: client.add_distance(src_rse, dest_rse, { "ranking": 1, "distance": 10 }) except Exception as err: self.log.error( "Cannot add distance for", "Source RSE: %s, Dest RSE: %s, Error:%s" % (src_rse, dest_rse, str(err)), ) # Collect the shares from Dirac Configuration and create them in Rucio self.log.info("Synchronizing shares") result = Operations().getOptionsDict("Production/SEshares") if result["OK"]: rseDict = result["Value"] for rse in rses: try: self.log.info("Setting productionSEshare for", "[RSE: %s : Share: %s", rse, rseDict.get(rse, 0)) client.add_rse_attribute(rse, "productionSEshare", rseDict.get(rse, 0)) except Exception as err: self.log.error( "Cannot create productionSEshare for RSE:", rse) else: self.log.error("Cannot get SEs", result["Message"]) result = Operations().getSections("Shares") if result["OK"]: for dataLevel in result["Value"]: result = Operations().getOptionsDict("Shares/%s" % dataLevel) if not result["OK"]: self.log.error("Cannot get SEs:" % result["Message"]) continue rseDict = result["Value"] for rse in rses: try: self.log.info( "Setting", "%sShare for %s : %s" % (dataLevel, rse, rseDict.get(rse, 0))) client.add_rse_attribute(rse, "%sShare" % dataLevel, rseDict.get(rse, 0)) except Exception as err: self.log.error("Cannot create share:", "%sShare for %s", dataLevel, rse) else: self.log.error("Cannot get shares:", result["Message"]) # Create the RSE attribute PrimaryDataSE and OccupancyLFN result = gConfig.getValue( "Resources/StorageElementGroups/PrimarySEs") result = getStorageElements(vo) if result["OK"]: allSEs = result["Value"] primarySEs = resolveSEGroup("PrimarySEs", allSEs) self.log.info("Will set primarySEs flag to:", str(primarySEs)) for rse in rses: if rse in allSEs: storage = StorageElement(rse) if not storage.valid: self.log.warn( "Storage element is not valid. Skipped RSE:", rse) continue occupancyLFN = storage.options.get("OccupancyLFN") try: client.add_rse_attribute(rse, "OccupancyLFN", occupancyLFN) except Exception as err: self.log.error( "Cannot create RSE attribute OccupancyLFN for", "[RSE: %s, Error: %s]" % (rse, str(err))) if rse in primarySEs: try: client.add_rse_attribute(rse, "PrimaryDataSE", True) except Exception as err: self.log.error( "Cannot create RSE attribute PrimaryDataSE for", "[RSE: %s, Error: %s]" % (rse, str(err)), ) else: try: client.delete_rse_attribute(rse, "PrimaryDataSE") except RSEAttributeNotFound: pass except Exception as err: self.log.error( "Cannot remove RSE attribute PrimaryDataSE for", "[RSE: %s, Error: %s]" % (rse, str(err)), ) self.log.info("RSEs synchronized for VO: ", vo) # Collect the user accounts from Dirac Configuration and create user accounts in Rucio self.log.info("Synchronizing accounts for VO", vo) listAccounts = [ str(acc["account"]) for acc in client.list_accounts() ] listScopes = [str(scope) for scope in client.list_scopes()] dnMapping = {} diracUsers = getUsersInVO(vo) self.log.debug(" Will consider following Dirac users for", "[VO: %s, Dirac users: %s]" % (vo, diracUsers)) for account in diracUsers: dn = getUserOption(account, "DN") email = getUserOption(account, "Email") dnMapping[dn] = email if account not in listAccounts: self.log.info("Will create account with associated DN ", "[account: %s, DN: %s]" % (account, dn)) try: client.add_account(account, "USER", email) listAccounts.append(account) except Exception as err: self.log.error( "Cannot create account", "[account: %s, Error: %s]" % (account, str(err))) try: client.add_identity(account=account, identity=dn, authtype="X509", email=email, default=True) except Exception as err: self.log.error( "Cannot add identity for account", "[Identity: dn=%s, account:%s, Error: %s]" % (dn, account, str(err)), ) self.log.error( " Account/identity skipped (it will not be created in Rucio)", "[%s/%s]" % (account, dn)) continue for rse in rses: client.set_local_account_limit(account, rse, 1000000000000000) else: try: client.add_identity(account=account, identity=dn, authtype="X509", email=email, default=True) except Duplicate: pass except Exception as err: self.log.error( "Cannot create identity for account", "[DN: %s, account: %s, Error: %s]" % (dn, account, str(err)), ) scope = "user." + account if scope not in listScopes: try: self.log.info("Will create a scope", "[Scope: %s]" % scope) client.add_scope(account, scope) self.log.info("Scope successfully added", "[Scope: %s]" % scope) except Exception as err: self.log.error( "Cannot create a scope", "[Scope: %s, Error: %s]" % (scope, str(err))) # Collect the group accounts from Dirac Configuration and create service accounts in Rucio result = getGroupsForVO(vo) if result["OK"]: groups = result["Value"] self.log.debug(" Will consider following Dirac groups for", "[%s VO: %s]" % (vo, groups)) else: groups = [] self.log.debug("No Dirac groups for", "%s VO " % vo) self.log.debug("No Rucio service accounts will be created") for group in groups: if group not in listAccounts: self.log.info( "Will create SERVICE account for Dirac group:", str(group)) try: client.add_account(group, "SERVICE", None) listAccounts.append(group) except Exception as err: self.log.error( "Cannot create SERVICE account for", "[group: %s, Error: %s]" % (group, str(err))) for rse in rses: client.set_local_account_limit(account, rse, 1000000000000000) for dn in getDNsInGroup(group): try: client.add_identity(account=group, identity=dn, authtype="X509", email=dnMapping.get( dn, default_email)) except Duplicate: pass except Exception as err: self.log.error( "Cannot create identity for account", "[identity %s, account %s, Error: %s]" % (dn, group, str(err)), ) self.log.error(format_exc()) # Collect the group accounts from Dirac Configuration and create service accounts in Rucio result = getHosts() if not result["OK"]: self.log.error("Cannot get host accounts:", "%s" % result["Message"]) else: hosts = result["Value"] for host in hosts: dn = getHostOption(host, "DN") email = dnMapping.get(dn, default_email) try: client.add_identity(account="dirac_srv", identity=dn, authtype="X509", email=email) except Duplicate: pass except Exception as err: self.log.error( "Cannot create identity for account dirac_srv:", "[DN: %s, Error: %s]" % (dn, str(err))) self.log.error(format_exc()) return S_OK() except Exception as exc: self.log.exception("Synchronisation for VO failed. VO skipped ", "VO=%s" % vo, lException=exc) return S_ERROR(str(format_exc()))
mySetup = gConfig.getValue('DIRAC/Setup') monitoringClient = ComponentMonitoringClient() # Retrieve information from all the hosts client = SystemAdministratorIntegrator(exclude=excludedHosts) resultAll = client.getOverallStatus() notificationClient = NotificationClient() for host in resultAll['Value']: if not resultAll['Value'][host]['OK']: # If the host cannot be contacted, exclude it and send message excludedHosts.append(host) result = notificationClient.sendMail( Operations().getValue('EMail/Production', []), 'Unreachable host', '\ndirac-populate-component-db: Could not fill the database with the components from unreachable host %s\n' % host) if not result['OK']: gLogger.error( 'Can not send unreachable host notification mail: %s' % result['Message']) if not resultAll['OK']: gLogger.error(resultAll['Message']) DIRACexit(-1) resultHosts = client.getHostInfo() if not resultHosts['OK']: gLogger.error(resultHosts['Message']) DIRACexit(-1) resultInfo = client.getInfo()
class Limiter(object): def __init__(self, jobDB=None, opsHelper=None): """ Constructor """ self.__runningLimitSection = "JobScheduling/RunningLimit" self.__matchingDelaySection = "JobScheduling/MatchingDelay" self.csDictCache = DictCache() self.condCache = DictCache() self.delayMem = {} if jobDB: self.jobDB = jobDB else: self.jobDB = JobDB() self.log = gLogger.getSubLogger("Limiter") if opsHelper: self.__opsHelper = opsHelper else: self.__opsHelper = Operations() def getNegativeCond(self): """ Get negative condition for ALL sites """ orCond = self.condCache.get("GLOBAL") if orCond: return orCond negCond = {} # Run Limit result = self.__opsHelper.getSections(self.__runningLimitSection) sites = [] if result['OK']: sites = result['Value'] for siteName in sites: result = self.__getRunningCondition(siteName) if not result['OK']: continue data = result['Value'] if data: negCond[siteName] = data # Delay limit result = self.__opsHelper.getSections(self.__matchingDelaySection) sites = [] if result['OK']: sites = result['Value'] for siteName in sites: result = self.__getDelayCondition(siteName) if not result['OK']: continue data = result['Value'] if not data: continue if siteName in negCond: negCond[siteName] = self.__mergeCond(negCond[siteName], data) else: negCond[siteName] = data orCond = [] for siteName in negCond: negCond[siteName]['Site'] = siteName orCond.append(negCond[siteName]) self.condCache.add("GLOBAL", 10, orCond) return orCond def getNegativeCondForSite(self, siteName): """ Generate a negative query based on the limits set on the site """ # Check if Limits are imposed onto the site negativeCond = {} if self.__opsHelper.getValue("JobScheduling/CheckJobLimits", True): result = self.__getRunningCondition(siteName) if result['OK']: negativeCond = result['Value'] self.log.verbose('Negative conditions for site %s after checking limits are: %s' % (siteName, str(negativeCond))) if self.__opsHelper.getValue("JobScheduling/CheckMatchingDelay", True): result = self.__getDelayCondition(siteName) if result['OK']: delayCond = result['Value'] self.log.verbose('Negative conditions for site %s after delay checking are: %s' % (siteName, str(delayCond))) negativeCond = self.__mergeCond(negativeCond, delayCond) if negativeCond: self.log.info('Negative conditions for site %s are: %s' % (siteName, str(negativeCond))) return negativeCond def __mergeCond(self, negCond, addCond): """ Merge two negative dicts """ # Merge both negative dicts for attr in addCond: if attr not in negCond: negCond[attr] = [] for value in addCond[attr]: if value not in negCond[attr]: negCond[attr].append(value) return negCond def __extractCSData(self, section): """ Extract limiting information from the CS in the form: { 'JobType' : { 'Merge' : 20, 'MCGen' : 1000 } } """ stuffDict = self.csDictCache.get(section) if stuffDict: return S_OK(stuffDict) result = self.__opsHelper.getSections(section) if not result['OK']: return result attribs = result['Value'] stuffDict = {} for attName in attribs: result = self.__opsHelper.getOptionsDict("%s/%s" % (section, attName)) if not result['OK']: return result attLimits = result['Value'] try: attLimits = dict([(k, int(attLimits[k])) for k in attLimits]) except Exception as excp: errMsg = "%s/%s has to contain numbers: %s" % (section, attName, str(excp)) self.log.error(errMsg) return S_ERROR(errMsg) stuffDict[attName] = attLimits self.csDictCache.add(section, 300, stuffDict) return S_OK(stuffDict) def __getRunningCondition(self, siteName): """ Get extra conditions allowing site throttling """ siteSection = "%s/%s" % (self.__runningLimitSection, siteName) result = self.__extractCSData(siteSection) if not result['OK']: return result limitsDict = result['Value'] # limitsDict is something like { 'JobType' : { 'Merge' : 20, 'MCGen' : 1000 } } if not limitsDict: return S_OK({}) # Check if the site exceeding the given limits negCond = {} for attName in limitsDict: if attName not in self.jobDB.jobAttributeNames: self.log.error("Attribute %s does not exist. Check the job limits" % attName) continue cK = "Running:%s:%s" % (siteName, attName) data = self.condCache.get(cK) if not data: result = self.jobDB.getCounters( 'Jobs', [attName], { 'Site': siteName, 'Status': [ 'Running', 'Matched', 'Stalled']}) if not result['OK']: return result data = result['Value'] data = dict([(k[0][attName], k[1]) for k in data]) self.condCache.add(cK, 10, data) for attValue in limitsDict[attName]: limit = limitsDict[attName][attValue] running = data.get(attValue, 0) if running >= limit: self.log.verbose('Job Limit imposed at %s on %s/%s=%d,' ' %d jobs already deployed' % (siteName, attName, attValue, limit, running)) if attName not in negCond: negCond[attName] = [] negCond[attName].append(attValue) # negCond is something like : {'JobType': ['Merge']} return S_OK(negCond) def updateDelayCounters(self, siteName, jid): # Get the info from the CS siteSection = "%s/%s" % (self.__matchingDelaySection, siteName) result = self.__extractCSData(siteSection) if not result['OK']: return result delayDict = result['Value'] # limitsDict is something like { 'JobType' : { 'Merge' : 20, 'MCGen' : 1000 } } if not delayDict: return S_OK() attNames = [] for attName in delayDict: if attName not in self.jobDB.jobAttributeNames: self.log.error("Attribute %s does not exist in the JobDB. Please fix it!" % attName) else: attNames.append(attName) result = self.jobDB.getJobAttributes(jid, attNames) if not result['OK']: self.log.error("While retrieving attributes coming from %s: %s" % (siteSection, result['Message'])) return result atts = result['Value'] # Create the DictCache if not there if siteName not in self.delayMem: self.delayMem[siteName] = DictCache() # Update the counters delayCounter = self.delayMem[siteName] for attName in atts: attValue = atts[attName] if attValue in delayDict[attName]: delayTime = delayDict[attName][attValue] self.log.notice("Adding delay for %s/%s=%s of %s secs" % (siteName, attName, attValue, delayTime)) delayCounter.add((attName, attValue), delayTime) return S_OK() def __getDelayCondition(self, siteName): """ Get extra conditions allowing matching delay """ if siteName not in self.delayMem: return S_OK({}) lastRun = self.delayMem[siteName].getKeys() negCond = {} for attName, attValue in lastRun: if attName not in negCond: negCond[attName] = [] negCond[attName].append(attValue) return S_OK(negCond)
def _getTransformationFiles(self, transDict, clients, statusList=None, replicateOrRemove=False): """ get the data replicas for a certain transID """ transID = transDict['TransformationID'] plugin = transDict.get('Plugin', 'Standard') # Check if files should be sorted and limited in number operations = Operations() sortedBy = operations.getValue( 'TransformationPlugins/%s/SortedBy' % plugin, None) maxFiles = operations.getValue( 'TransformationPlugins/%s/MaxFiles' % plugin, 0) noUnusedDelay = 0 if self.pluginTimeout.get( transID, False) else operations.getValue( 'TransformationPlugins/%s/NoUnusedDelay' % plugin, self.noUnusedDelay) method = '_getTransformationFiles' lastOffset = self.lastFileOffset.setdefault(transID, 0) # Files that were problematic (either explicit or because SE was banned) may be recovered, # and always removing the missing ones if not statusList: statusList = ['Unused', 'ProbInFC'] statusList += ['MissingInFC'] if transDict['Type'] == 'Removal' else [] transClient = clients['TransformationClient'] res = transClient.getTransformationFiles(condDict={ 'TransformationID': transID, 'Status': statusList }, orderAttribute=sortedBy, offset=lastOffset, maxfiles=maxFiles) if not res['OK']: self._logError("Failed to obtain input data:", res['Message'], method=method, transID=transID) return res transFiles = res['Value'] if maxFiles and len(transFiles) == maxFiles: self.lastFileOffset[transID] += maxFiles else: del self.lastFileOffset[transID] if not transFiles: self._logInfo("No '%s' files found for transformation." % ','.join(statusList), method=method, transID=transID) if transDict['Status'] == 'Flush': res = transClient.setTransformationParameter( transID, 'Status', 'Active') if not res['OK']: self._logError( "Failed to update transformation status to 'Active':", res['Message'], method=method, transID=transID) else: self._logInfo("Updated transformation status to 'Active'.", method=method, transID=transID) return S_OK() # Check if transformation is kicked kickFile = os.path.join(self.controlDirectory, 'KickTransformation_%s' % str(transID)) try: kickTrans = os.path.exists(kickFile) if kickTrans: os.remove(kickFile) except OSError: pass # Check if something new happened now = datetime.datetime.utcnow() if not kickTrans and not replicateOrRemove and noUnusedDelay: nextStamp = self.unusedTimeStamp.setdefault( transID, now) + datetime.timedelta(hours=noUnusedDelay) skip = now < nextStamp if len(transFiles) == self.unusedFiles.get( transID, 0) and transDict['Status'] != 'Flush' and skip: self._logInfo("No new '%s' files found for transformation." % ','.join(statusList), method=method, transID=transID) return S_OK() self.unusedTimeStamp[transID] = now # If files are not Unused, set them Unused notUnused = [ trFile['LFN'] for trFile in transFiles if trFile['Status'] != 'Unused' ] otherStatuses = sorted( set([trFile['Status'] for trFile in transFiles]) - set(['Unused'])) if notUnused: res = transClient.setFileStatusForTransformation(transID, 'Unused', notUnused, force=True) if not res['OK']: self._logError("Error setting %d files Unused:" % len(notUnused), res['Message'], method=method, transID=transID) else: self._logInfo("Set %d files from %s to Unused" % (len(notUnused), ','.join(otherStatuses))) self.__removeFilesFromCache(transID, notUnused) return S_OK(transFiles)
def optimizeJob(self, jid, jobState): """ 1. Banned sites are removed from the destination list. 2. Get input files 3. Production jobs are sent directly to TQ 4. Check if staging is necessary """ # Reschedule delay result = jobState.getAttributes( ['RescheduleCounter', 'RescheduleTime', 'ApplicationStatus']) if not result['OK']: return result attDict = result['Value'] try: reschedules = int(attDict['RescheduleCounter']) except (ValueError, KeyError): return S_ERROR("RescheduleCounter has to be an integer") if reschedules != 0: delays = self.ex_getOption('RescheduleDelays', [60, 180, 300, 600]) delay = delays[min(reschedules, len(delays) - 1)] waited = toEpoch() - toEpoch(fromString(attDict['RescheduleTime'])) if waited < delay: return self.__holdJob( jobState, 'On Hold: after rescheduling %s' % reschedules, delay) # Get the job manifest for the later checks result = jobState.getManifest() if not result['OK']: return S_ERROR("Could not retrieve job manifest: %s" % result['Message']) jobManifest = result['Value'] # Get site requirements result = self.__getSitesRequired(jobManifest) if not result['OK']: return result userSites, userBannedSites = result['Value'] # Get job type result = jobState.getAttribute("JobType") if not result['OK']: return S_ERROR("Could not retrieve job type") jobType = result['Value'] # Get banned sites from DIRAC result = self.siteClient.getSites('Banned') if not result['OK']: return S_ERROR("Cannot retrieve banned sites from JobDB") wmsBannedSites = result['Value'] # If the user has selected any site, filter them and hold the job if not able to run if userSites: if jobType not in self.ex_getOption('ExcludedOnHoldJobTypes', []): result = self.siteClient.getUsableSites(userSites) if not result['OK']: return S_ERROR( "Problem checking userSites for tuple of active/banned/invalid sites" ) usableSites = set(result['Value']) bannedSites = [] invalidSites = [] for site in userSites: if site in wmsBannedSites: bannedSites.append(site) elif site not in usableSites: invalidSites.append(site) if invalidSites: self.jobLog.debug("Invalid site(s) requested: %s" % ','.join(invalidSites)) if not self.ex_getOption('AllowInvalidSites', True): return self.__holdJob( jobState, "Requested site(s) %s are invalid" % ",".join(invalidSites)) if bannedSites: self.jobLog.debug("Banned site(s) %s ignored" % ",".join(bannedSites)) if not usableSites: return self.__holdJob( jobState, "Requested site(s) %s are inactive" % ",".join(bannedSites)) if not usableSites: return self.__holdJob( jobState, "No requested site(s) are active/valid") userSites = list(usableSites) checkPlatform = self.ex_getOption('CheckPlatform', False) jobPlatform = jobManifest.getOption("Platform", None) # First check that the platform is valid (in OSCompatibility list) if checkPlatform and jobPlatform: result = gConfig.getOptionsDict( '/Resources/Computing/OSCompatibility') if not result['OK']: return S_ERROR("Unable to get OSCompatibility list") allPlatforms = result['Value'] if jobPlatform not in allPlatforms: self.jobLog.error("Platform %s is not supported" % jobPlatform) return S_ERROR("Platform %s is not supported" % jobPlatform) # Filter the userSites by the platform selection (if there is one) if checkPlatform and userSites: if jobPlatform: result = self.__filterByPlatform(jobPlatform, userSites) if not result['OK']: self.jobLog.error( "Failed to filter job sites by platform: %s" % result['Message']) return S_ERROR("Failed to filter job sites by platform") userSites = result['Value'] if not userSites: # No sites left after filtering -> Invalid platform/sites combination self.jobLog.error("No selected sites match platform '%s'" % jobPlatform) return S_ERROR("No selected sites match platform '%s'" % jobPlatform) # Check if there is input data result = jobState.getInputData() if not result['OK']: self.jobLog.error("Cannot get input data %s" % (result['Message'])) return S_ERROR("Failed to get input data from JobDB") if not result['Value']: # No input data? Just send to TQ return self.__sendToTQ(jobState, jobManifest, userSites, userBannedSites) self.jobLog.verbose("Has an input data requirement") inputData = result['Value'] # =================================================================================== # Production jobs are sent to TQ, but first we have to verify if staging is necessary # =================================================================================== if jobType in Operations().getValue('Transformations/DataProcessing', []): self.jobLog.info( "Production job: sending to TQ, but first checking if staging is requested" ) res = getFilesToStage(inputData, jobState=jobState, checkOnlyTapeSEs=self.ex_getOption( 'CheckOnlyTapeSEs', True), jobLog=self.jobLog) if not res['OK']: return self.__holdJob(jobState, res['Message']) if res['Value']['absentLFNs']: # Some files do not exist at all... set the job Failed # Reverse errors reasons = {} for lfn, reason in res['Value']['absentLFNs'].iteritems(): reasons.setdefault(reason, []).append(lfn) for reason, lfns in reasons.iteritems(): # Some files are missing in the FC or in SEs, fail the job self.jobLog.error(reason, ','.join(lfns)) error = ','.join(reasons) return S_ERROR(error) if res['Value']['failedLFNs']: return self.__holdJob( jobState, "Couldn't get storage metadata of some files") stageLFNs = res['Value']['offlineLFNs'] if stageLFNs: res = self.__checkStageAllowed(jobState) if not res['OK']: return res if not res['Value']: return S_ERROR("Stage not allowed") self.__requestStaging(jobState, stageLFNs) return S_OK() else: # No staging required onlineSites = res['Value']['onlineSites'] if onlineSites: # Set the online site(s) first userSites = set(userSites) onlineSites &= userSites userSites = list(onlineSites) + list(userSites - onlineSites) return self.__sendToTQ(jobState, jobManifest, userSites, userBannedSites, onlineSites=onlineSites) # =================================================== # From now on we know it's a user job with input data # =================================================== idAgent = self.ex_getOption('InputDataAgent', 'InputData') result = self.retrieveOptimizerParam(idAgent) if not result['OK']: self.jobLog.error("Could not retrieve input data info", result['Message']) return S_ERROR("Could not retrieve input data info") opData = result['Value'] if 'SiteCandidates' not in opData: return S_ERROR("No possible site candidates") # Filter input data sites with user requirement siteCandidates = list(opData['SiteCandidates']) self.jobLog.info("Site candidates are %s" % siteCandidates) if userSites: siteCandidates = list(set(siteCandidates) & set(userSites)) siteCandidates = self._applySiteFilter(siteCandidates, banned=userBannedSites) if not siteCandidates: return S_ERROR("Impossible InputData * Site requirements") idSites = {} for site in siteCandidates: idSites[site] = opData['SiteCandidates'][site] # Check if sites have correct count of disk+tape replicas numData = len(inputData) errorSites = set() for site in idSites: if numData != idSites[site]['disk'] + idSites[site]['tape']: self.jobLog.error( "Site candidate %s does not have all the input data" % site) errorSites.add(site) for site in errorSites: idSites.pop(site) if not idSites: return S_ERROR("Site candidates do not have all the input data") # Check if staging is required stageRequired, siteCandidates = self.__resolveStaging( inputData, idSites) if not siteCandidates: return S_ERROR("No destination sites available") # Is any site active? stageSites = self._applySiteFilter(siteCandidates, banned=wmsBannedSites) if not stageSites: return self.__holdJob( jobState, "Sites %s are inactive or banned" % ", ".join(siteCandidates)) # If no staging is required send to TQ if not stageRequired: # Use siteCandidates and not stageSites because active and banned sites # will be taken into account on matching time return self.__sendToTQ(jobState, jobManifest, siteCandidates, userBannedSites) # Check if the user is allowed to stage if self.ex_getOption("RestrictDataStage", False): res = self.__checkStageAllowed(jobState) if not res['OK']: return res if not res['Value']: return S_ERROR("Stage not allowed") # Get stageSites[0] because it has already been randomized and it's as good as any in stageSites stageSite = stageSites[0] self.jobLog.verbose(" Staging site will be %s" % (stageSite)) stageData = idSites[stageSite] # Set as if everything has already been staged stageData['disk'] += stageData['tape'] stageData['tape'] = 0 # Set the site info back to the original dict to save afterwards opData['SiteCandidates'][stageSite] = stageData stageRequest = self.__preRequestStaging(jobManifest, stageSite, opData) if not stageRequest['OK']: return stageRequest stageLFNs = stageRequest['Value'] result = self.__requestStaging(jobState, stageLFNs) if not result['OK']: return result stageLFNs = result['Value'] self.__updateSharedSESites(jobManifest, stageSite, stageLFNs, opData) # Save the optimizer data again self.jobLog.verbose('Updating %s Optimizer Info:' % (idAgent), opData) result = self.storeOptimizerParam(idAgent, opData) if not result['OK']: return result return self.__setJobSite(jobState, stageSites)
class FCConditionParser(object): """This objects allows to evaluate conditions on whether or not a given operation should be evaluated on a given catalog for a given lfn (be glad so many things are given to you !). The conditions are expressed as boolean logic, where the basic bloc has the form "pluginName=whateverThatWillBePassedToThePlugin". The basic blocs will be evaluated by the respective plugins, and the result can be combined using the standard boolean operators:: * `!` for not * `&` for and * `|` for or * `[ ]` for prioritizing the operations All these characters, as well as the '=' symbol cannot be used in any expression to be evaluated by a plugin. The rule to evaluate can either be given at calling time, or can be retrieved from the CS depending on the context (see doc of __call__ and __getConditionFromCS) Example of rules are:: * Filename=startswith('/lhcb') & Proxy=voms.has(/lhcb/Role->production) * [Filename=startswith('/lhcb') & !Filename=find('/user/')] | Proxy=group.in(lhcb_mc, lhcb_data) """ # Some characters are reserved for the grammar __forbidenChars = ("[", "]", "!", "&", "|", "=") __allowedChars = "".join(set(printables) - set(__forbidenChars)) + " " # Defines the basic shape of a rule : pluginName=whateverThatWillBePassedToThePlugin __pluginOperand = Word(__allowedChars) + Literal("=") + Word( __allowedChars) # define classes to be built at parse time, as each matching # expression type is parsed # Binary operator base class class _BoolBinOp(object): """Abstract object to represent a binary operator""" reprsymbol = None # Sign to represent the boolean operation # This is the boolean operation to apply # Could be None, but it should be callable @staticmethod def evalop(_x): return None def __init__(self, token): """ :param token: the token matching a binary operator it is a list with only one element which itself is a list [ [ Arg1, Operator, Arg2] ] The arguments themselves can be of any type, but they need to provide an "eval" method that takes `kwargs` as input, and return a boolean """ # Keep the two arguments self.args = token[0][0::2] def __str__(self): """String representation""" sep = " %s " % self.reprsymbol return "(" + sep.join(map(str, self.args)) + ")" def eval(self, **kwargs): """Perform the evaluation of the boolean logic by applying the operator between the two arguments :param kwargs: whatever information is given to plugin (typically lfn) """ return self.evalop(arg.eval(**kwargs) for arg in self.args) __repr__ = __str__ class _BoolAnd(_BoolBinOp): """Represents the 'and' operator""" reprsymbol = "&" evalop = all class _BoolOr(_BoolBinOp): """Represents the 'or' operator""" reprsymbol = "|" evalop = any class _BoolNot(object): """Represents the "not" unitary operator""" def __init__(self, t): """ :param t: the token matching a unitary operator it is a list with only one element which itself is a list [ [ !, Arg1] ] The argument itself can be of any type, but it needs to provide an "eval" method that takes `kwargs` as input, and return a boolean """ # We just keep the argument self.arg = t[0][1] def eval(self, **kwargs): """Perform the evaluation of the boolean logic by returning the negation of the evaluation of the argument :param kwargs: whatever information is given to plugin (typically lfn) """ return not self.arg.eval(**kwargs) def __str__(self): return "!" + str(self.arg) __repr__ = __str__ # We can combine the pluginOperand with boolean expression, # and prioritized by squared bracket __boolExpr = infixNotation( __pluginOperand, [ ("!", 1, opAssoc.RIGHT, _BoolNot), ("&", 2, opAssoc.LEFT, _BoolAnd), ("|", 2, opAssoc.LEFT, _BoolOr), ], lpar=Suppress("["), rpar=Suppress("]"), ) # Wrapper that will call the plugin class PluginOperand(object): """This class is a wrapper for a plugin and it's condition It is instantiated by pyparsing every time it encounters "plugin=condition" """ def __init__(self, tokens): """ :param tokens: [ pluginName, =, conditions ] the pluginName is automatically prepended with 'Plugin' """ self.pluginName = "%sPlugin" % tokens[0].strip(" ") self.conditions = tokens[2].strip(" ") # Load the plugin, and give it the condition objLoader = ObjectLoader() _class = objLoader.loadObject( "Resources.Catalog.ConditionPlugins.%s" % self.pluginName) if not _class["OK"]: raise Exception(_class["Message"]) self._pluginInst = _class["Value"](self.conditions) def eval(self, **kwargs): """Forward the evaluation call to the plugin :param kwargs: contains all the information given to the plugin namely the lfns :return: True or False """ return self._pluginInst.eval(**kwargs) def __str__(self): return self.pluginName __repr__ = __str__ def __init__(self, vo=None, ro_methods=None): """ :param vo: name of the VO """ # Whenever we parse text matching the __pluginOperand grammar, create a PluginOperand object self.__pluginOperand.setParseAction( lambda tokens: self.PluginOperand(tokens)) self.opHelper = Operations(vo=vo) self.ro_methods = ro_methods if ro_methods else [] self.log = gLogger.getSubLogger("FCConditionParser") def __evaluateCondition(self, conditionString, **kwargs): """Evaluate a condition against attributes, typically lfn. CAUTION: lfns are here given one by one """ self.log.debug("Testing %s against %s" % (conditionString, kwargs)) # Parse all the condition and evaluate it # res is a tuple whose first and only element is either # one of the bool operator defined above, or a PluginOperand res = self.__boolExpr.parseString(conditionString) res = res[0].eval(**kwargs) self.log.debug("Evaluated to %s" % res) return res def __getConditionFromCS(self, catalogName, operationName): """Retrieves the appropriate condition from the CS The base path is in Operation/[Setup/Default]/DataManagement/FCConditions/[CatalogName] If there are no condition defined for the method, we check the global READ/WRITE condition. If this does not exist either, we check the global ALL condition. If none is defined, we return None :param str catalogName: the catalog we want to work on :param str operationName: the operation we want to perform The operationName must be in the read or write method from FileCatalog :returns: a condition string or None """ basePath = "Services/Catalogs/%s/Conditions/" % catalogName pathList = [ basePath + "%s" % operationName, basePath + "%s" % ("READ" if operationName in self.ro_methods else "WRITE"), basePath + "ALL", ] for path in pathList: condVal = self.opHelper.getValue(path) if condVal: return condVal def __call__(self, catalogName, operationName, lfns, condition=None, **kwargs): """ Makes a boolean evaluation of a condition, for a given catalog, a given operation, and a list of lfns. Extra parameters might be given, and will be forwarded to each plugin. If the 'condition' attribute is not specified (general case), it is fetched from the CS (see __getConditionFromCS) If there are no condition at all, return True for everything. A programming error in the plugins will lead to the evaluation being False .. Note:: if the CS can't be contacted, the conditions will be evaluated to None (courtesy of the Operation helper), so everything will be evaluated to True. Ultimately, it does not really matter, since you will not be able to find any catalog beforehand if you can't contact the CS... :param str catalogName: name of the catalog we want to work on :param str operationName: name of the operation we want to perform The operationName must be in the read or write method from FileCatalog if it should be retrieve from the CS :param lfns: list/dict of lfns .. warning:: LFNs are expected to have been through the normalizing process, so it should not be a string :param condition: condition string. If not specified, will be fetched from the CS :param kwargs: extra params forwarded to the plugins :return: S_OK with a 'Successful' dict {lfn:True/False} where the value is the evaluation of the condition against the given lfn key. Failed dict is always empty """ self.log.debug("Testing %s on %s for %s lfns" % (operationName, catalogName, len(lfns))) conditionStr = condition if condition is not None else self.__getConditionFromCS( catalogName, operationName) self.log.debug("Condition string: %s" % conditionStr) evaluatedLfns = {} if conditionStr: for lfn in lfns: try: evaluatedLfns[lfn] = self.__evaluateCondition(conditionStr, lfn=lfn, **kwargs) except Exception as e: self.log.exception("Exception while evaluation conditions", lException=e) evaluatedLfns[lfn] = False else: evaluatedLfns = dict.fromkeys(lfns, True) return S_OK({"Successful": evaluatedLfns, "Failed": {}})
def doTheWhizardInstallation(): """Do the instalation for new whizard version Copy libraries, create tarball, upload processList file add entry in configuration system """ res = checkSLCVersion() if not res['OK']: gLogger.error(res['Message']) dexit(1) res = checkGFortranVersion() if not res['OK']: gLogger.error(res['Message']) dexit(1) cliParams = Params() cliParams.registerSwitches() Script.parseCommandLine( ignoreErrors= False) whizardResultFolder = cliParams.path platform = cliParams.platform whizard_version = cliParams.version appVersion = whizard_version beam_spectra_version = cliParams.beam_spectra if not whizardResultFolder or not whizard_version or not beam_spectra_version: Script.showHelp() dexit(2) from ILCDIRAC.Core.Utilities.ProcessList import ProcessList from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin from ILCDIRAC.Core.Utilities.FileUtils import upload from DIRAC.DataManagementSystem.Client.DataManager import DataManager diracAdmin = DiracAdmin() modifiedCS = False softwareSection = "/Operations/Defaults/AvailableTarBalls" processlistLocation = "ProcessList/Location" appName = "whizard" ops = Operations() path_to_process_list = ops.getValue(processlistLocation, "") if not path_to_process_list: gLogger.error("Could not find process list location in CS") dexit(2) gLogger.verbose("Getting process list from file catalog") datMan = DataManager() res = datMan.getFile(path_to_process_list) if not res['OK']: gLogger.error("Error while getting process list from storage") dexit(2) gLogger.verbose("done") ##just the name of the local file in current working directory processlist = os.path.basename(path_to_process_list) if not os.path.exists(processlist): gLogger.error("Process list does not exist locally") dexit(2) pl = ProcessList(processlist) startDir = os.getcwd() inputlist = {} os.chdir(whizardResultFolder) folderlist = os.listdir(whizardResultFolder) whiz_here = folderlist.count("whizard") if whiz_here == 0: gLogger.error("whizard executable not found in %s, please check" % whizardResultFolder) os.chdir(startDir) dexit(2) whizprc_here = folderlist.count("whizard.prc") if whizprc_here == 0: gLogger.error("whizard.prc not found in %s, please check" % whizardResultFolder) os.chdir(startDir) dexit(2) whizmdl_here = folderlist.count("whizard.mdl") if whizmdl_here == 0: gLogger.error("whizard.mdl not found in %s, please check" % whizardResultFolder) os.chdir(startDir) dexit(2) gLogger.verbose("Preparing process list") ## FIXME:: What is this doing exactly? Is this necessary? -- APS, JFS for f in folderlist: if f.count(".in"): infile = open(f, "r") found_detail = False for line in infile: if line.count("decay_description"): currprocess = f.split(".template.in")[0] inputlist[currprocess] = {} inputlist[currprocess]["InFile"] = f.rstrip("~") inputlist[currprocess]["Detail"] = line.split("\"")[1] found_detail = True if line.count("process_id") and found_detail: process_id = line.split("\"")[1] inputlist[currprocess]["Model"] = "" inputlist[currprocess]["Generator"] = "" inputlist[currprocess]["Restrictions"] = "" for process in process_id.split(): print "Looking for detail of process %s" % (process) process_detail = getDetailsFromPRC("whizard.prc", process) inputlist[currprocess]["Model"] = process_detail["Model"] inputlist[currprocess]["Generator"] = process_detail["Generator"] if len(inputlist[currprocess]["Restrictions"]): inputlist[currprocess]["Restrictions"] = inputlist[currprocess]["Restrictions"] + ", " + process_detail["Restrictions"] else: inputlist[currprocess]["Restrictions"] = process_detail["Restrictions"] #if len(inputlist[currprocess].items()): # inputlist.append(processdict) ## END FIXEME ##Update inputlist with what was found looking in the prc file processes = readPRCFile("whizard.prc") inputlist.update(processes) ##get from cross section files the cross sections for the processes in inputlist #Need full process list for f in folderlist: if f.count("cross_sections_"): crossfile = open(f, "r") for line in crossfile: line = line.rstrip().lstrip() if not len(line): continue if line[0] == "#" or line[0] == "!": continue if len(line.split()) < 2: continue currprocess = line.split()[0] if currprocess in inputlist: inputlist[currprocess]['CrossSection'] = line.split()[1] gLogger.notice("Preparing Tarball") ##Make a folder in the current directory of the user to store the whizard libraries, executable et al. localWhizardFolderRel = ("whizard" + whizard_version) # relative path localWhizardFolder = os.path.join(startDir, localWhizardFolderRel) if not os.path.exists(localWhizardFolder): os.makedirs(localWhizardFolder) localWhizardLibFolder = os.path.join(localWhizardFolder,'lib') if os.path.exists(localWhizardLibFolder): shutil.rmtree(localWhizardLibFolder) os.makedirs(localWhizardLibFolder) ##creates the lib folder whizardLibraries = getListOfLibraries(os.path.join(whizardResultFolder, "whizard")) copyLibsCall = ["rsync","-avzL"] for lib in whizardLibraries: copyLibsCall.append(lib) copyLibsCall.append(localWhizardLibFolder) subprocess.Popen(copyLibsCall, stdout=subprocess.PIPE, stderr=subprocess.PIPE) for fileName in folderlist: shutil.copy(fileName, localWhizardFolder) ##Get the list of md5 sums for all the files in the folder to be tarred os.chdir( localWhizardFolder ) subprocess.call(["find . -type f -exec md5sum {} > ../md5_checksum.md5 \\; && mv ../md5_checksum.md5 ."], shell=True) os.chdir(startDir) ##Create the Tarball gLogger.notice("Creating Tarball...") appTar = localWhizardFolder + ".tgz" myappTar = tarfile.open(appTar, "w:gz") myappTar.add(localWhizardFolderRel) myappTar.close() md5sum = md5.md5(open( appTar, 'r' ).read()).hexdigest() gLogger.notice("...Done") gLogger.notice("Registering new Tarball in CS") tarballurl = {} av_platforms = gConfig.getSections(softwareSection, []) if av_platforms['OK']: if platform not in av_platforms['Value']: gLogger.error("Platform %s unknown, available are %s." % (platform, ", ".join(av_platforms['Value']))) gLogger.error("If yours is missing add it in CS") dexit(255) else: gLogger.error("Could not find all platforms available in CS") dexit(255) av_apps = gConfig.getSections("%s/%s" % (softwareSection, platform), []) if not av_apps['OK']: gLogger.error("Could not find all applications available in CS") dexit(255) if appName.lower() in av_apps['Value']: versions = gConfig.getSections("%s/%s/%s" % (softwareSection, platform, appName.lower()), []) if not versions['OK']: gLogger.error("Could not find all versions available in CS") dexit(255) if appVersion in versions['Value']: gLogger.error('Application %s %s for %s already in CS, nothing to do' % (appName.lower(), appVersion, platform)) dexit(0) else: result = diracAdmin.csSetOption("%s/%s/%s/%s/TarBall" % (softwareSection, platform, appName.lower(), appVersion), os.path.basename(appTar)) if result['OK']: modifiedCS = True tarballurl = gConfig.getOption("%s/%s/%s/TarBallURL" % (softwareSection, platform, appName.lower()), "") if len(tarballurl['Value']) > 0: res = upload(tarballurl['Value'], appTar) if not res['OK']: gLogger.error("Upload to %s failed" % tarballurl['Value']) dexit(255) result = diracAdmin.csSetOption("%s/%s/%s/%s/Md5Sum" % (softwareSection, platform, appName.lower(), appVersion), md5sum) if result['OK']: modifiedCS = True result = diracAdmin.csSetOption("%s/%s/%s/%s/Dependencies/beam_spectra/version" % (softwareSection, platform, appName.lower(), appVersion), beam_spectra_version) else: result = diracAdmin.csSetOption("%s/%s/%s/%s/TarBall" % (softwareSection, platform, appName.lower(), appVersion), os.path.basename(appTar)) if result['OK']: modifiedCS = True tarballurl = gConfig.getOption("%s/%s/%s/TarBallURL" % (softwareSection, platform, appName.lower()), "") if len(tarballurl['Value']) > 0: res = upload(tarballurl['Value'], appTar) if not res['OK']: gLogger.error("Upload to %s failed" % tarballurl['Value']) dexit(255) result = diracAdmin.csSetOption("%s/%s/%s/%s/Md5Sum" % (softwareSection, platform, appName.lower(), appVersion), md5sum) result = diracAdmin.csSetOption("%s/%s/%s/%s/Dependencies/beam_spectra/version" % (softwareSection, platform, appName.lower(), appVersion), beam_spectra_version) gLogger.verbose("Done uploading the tar ball") os.remove(appTar) #Set for all new processes the TarBallURL for process in inputlist.keys(): inputlist[process]['TarBallCSPath'] = tarballurl['Value'] + os.path.basename(appTar) pl.updateProcessList(inputlist) pl.writeProcessList() raw_input("Do you want to upload the process list? Press ENTER to proceed or CTRL-C to abort!") pl.uploadProcessListToFileCatalog(path_to_process_list, appVersion) #Commit the changes if nothing has failed and the CS has been modified if modifiedCS: result = diracAdmin.csCommitChanges(False) gLogger.verbose(result) gLogger.notice('All done OK!') dexit(0)
class WorkflowTasks(TaskBase): """ Handles jobs """ def __init__(self, transClient=None, logger=None, submissionClient=None, jobMonitoringClient=None, outputDataModule=None, jobClass=None, opsH=None, destinationPlugin=None, ownerDN=None, ownerGroup=None): """ Generates some default objects. jobClass is by default "DIRAC.Interfaces.API.Job.Job". An extension of it also works: VOs can pass in their job class extension, if present """ if not logger: logger = gLogger.getSubLogger('WorkflowTasks') super(WorkflowTasks, self).__init__(transClient, logger) useCertificates = True if (bool(ownerDN) and bool(ownerGroup)) else False if not submissionClient: self.submissionClient = WMSClient(useCertificates=useCertificates, delegatedDN=ownerDN, delegatedGroup=ownerGroup) else: self.submissionClient = submissionClient if not jobMonitoringClient: self.jobMonitoringClient = JobMonitoringClient() else: self.jobMonitoringClient = jobMonitoringClient if not jobClass: self.jobClass = Job else: self.jobClass = jobClass if not opsH: self.opsH = Operations() else: self.opsH = opsH if not outputDataModule: self.outputDataModule = self.opsH.getValue( "Transformations/OutputDataModule", "") else: self.outputDataModule = outputDataModule if not destinationPlugin: self.destinationPlugin = self.opsH.getValue( 'Transformations/DestinationPlugin', 'BySE') else: self.destinationPlugin = destinationPlugin self.destinationPlugin_o = None self.outputDataModule_o = None def prepareTransformationTasks(self, transBody, taskDict, owner='', ownerGroup='', ownerDN='', bulkSubmissionFlag=False): """ Prepare tasks, given a taskDict, that is created (with some manipulation) by the DB jobClass is by default "DIRAC.Interfaces.API.Job.Job". An extension of it also works. :param str transBody: transformation job template :param dict taskDict: dictionary of per task parameters :param str owner: owner of the transformation :param str ownerGroup: group of the owner of the transformation :param str ownerDN: DN of the owner of the transformation :param bool bulkSubmissionFlag: flag for using bulk submission or not :return: S_OK/S_ERROR with updated taskDict """ if (not owner) or (not ownerGroup): res = getProxyInfo(False, False) if not res['OK']: return res proxyInfo = res['Value'] owner = proxyInfo['username'] ownerGroup = proxyInfo['group'] if not ownerDN: res = getDNForUsername(owner) if not res['OK']: return res ownerDN = res['Value'][0] if bulkSubmissionFlag: return self.__prepareTasksBulk(transBody, taskDict, owner, ownerGroup, ownerDN) # not a bulk submission return self.__prepareTasks(transBody, taskDict, owner, ownerGroup, ownerDN) def __prepareTasksBulk(self, transBody, taskDict, owner, ownerGroup, ownerDN): """ Prepare transformation tasks with a single job object for bulk submission :param str transBody: transformation job template :param dict taskDict: dictionary of per task parameters :param str owner: owner of the transformation :param str ownerGroup: group of the owner of the transformation :param str ownerDN: DN of the owner of the transformation :return: S_OK/S_ERROR with updated taskDict """ if taskDict: transID = taskDict.values()[0]['TransformationID'] else: return S_OK({}) method = '__prepareTasksBulk' startTime = time.time() # Prepare the bulk Job object with common parameters oJob = self.jobClass(transBody) self._logVerbose('Setting job owner:group to %s:%s' % (owner, ownerGroup), transID=transID, method=method) oJob.setOwner(owner) oJob.setOwnerGroup(ownerGroup) oJob.setOwnerDN(ownerDN) try: site = oJob.workflow.findParameter('Site').getValue() except AttributeError: site = None jobType = oJob.workflow.findParameter('JobType').getValue() transGroup = str(transID).zfill(8) # Verify that the JOB_ID parameter is added to the workflow if not oJob.workflow.findParameter('JOB_ID'): oJob._addParameter(oJob.workflow, 'JOB_ID', 'string', '00000000', "Initial JOB_ID") if oJob.workflow.findParameter('PRODUCTION_ID'): oJob._setParamValue('PRODUCTION_ID', str(transID).zfill(8)) # pylint: disable=protected-access else: oJob._addParameter( oJob.workflow, # pylint: disable=protected-access 'PRODUCTION_ID', 'string', str(transID).zfill(8), "Production ID") oJob.setType(jobType) self._logVerbose('Adding default transformation group of %s' % (transGroup), transID=transID, method=method) oJob.setJobGroup(transGroup) if int(transID) in [ int(x) for x in self.opsH.getValue("Hospital/Transformations", []) ]: self._handleHospital(oJob) # Collect per job parameters sequences paramSeqDict = {} # tasks must be sorted because we use bulk submission and we must find the correspondance for taskID in sorted(taskDict): paramsDict = taskDict[taskID] seqDict = {} if site is not None: paramsDict['Site'] = site paramsDict['JobType'] = jobType # Handle destination site sites = self._handleDestination(paramsDict) if not sites: self._logError('Could not get a list a sites', transID=transID, method=method) return S_ERROR(ETSUKN, "Can not evaluate destination site") else: self._logVerbose('Setting Site: ', str(sites), transID=transID, method=method) seqDict['Site'] = sites seqDict['JobName'] = self._transTaskName(transID, taskID) seqDict['JOB_ID'] = str(taskID).zfill(8) self._logDebug('TransID: %s, TaskID: %s, paramsDict: %s' % (transID, taskID, str(paramsDict)), transID=transID, method=method) # Handle Input Data inputData = paramsDict.get('InputData') if inputData: if isinstance(inputData, basestring): inputData = inputData.replace(' ', '').split(';') self._logVerbose('Setting input data to %s' % inputData, transID=transID, method=method) seqDict['InputData'] = inputData elif paramSeqDict.get('InputData') is not None: self._logError( "Invalid mixture of jobs with and without input data") return S_ERROR( ETSDATA, "Invalid mixture of jobs with and without input data") for paramName, paramValue in paramsDict.iteritems(): if paramName not in ('InputData', 'Site', 'TargetSE'): if paramValue: self._logVerbose('Setting %s to %s' % (paramName, paramValue), transID=transID, method=method) seqDict[paramName] = paramValue outputParameterList = [] if self.outputDataModule: res = self.getOutputData({ 'Job': oJob._toXML(), 'TransformationID': transID, # pylint: disable=protected-access 'TaskID': taskID, 'InputData': inputData }) if not res['OK']: self._logError("Failed to generate output data", res['Message'], transID=transID, method=method) continue for name, output in res['Value'].iteritems(): seqDict[name] = output outputParameterList.append(name) if oJob.workflow.findParameter(name): oJob._setParamValue(name, "%%(%s)s" % name) # pylint: disable=protected-access else: oJob._addParameter( oJob.workflow, # pylint: disable=protected-access name, 'JDL', "%%(%s)s" % name, name) for pName, seq in seqDict.iteritems(): paramSeqDict.setdefault(pName, []).append(seq) for paramName, paramSeq in paramSeqDict.iteritems(): if paramName in ['JOB_ID', 'PRODUCTION_ID', 'InputData' ] + outputParameterList: res = oJob.setParameterSequence(paramName, paramSeq, addToWorkflow=paramName) else: res = oJob.setParameterSequence(paramName, paramSeq) if not res['OK']: return res if taskDict: self._logInfo('Prepared %d tasks' % len(taskDict), transID=transID, method=method, reftime=startTime) taskDict['BulkJobObject'] = oJob return S_OK(taskDict) def __prepareTasks(self, transBody, taskDict, owner, ownerGroup, ownerDN): """ Prepare transformation tasks with a job object per task :param str transBody: transformation job template :param dict taskDict: dictionary of per task parameters :param owner: owner of the transformation :param str ownerGroup: group of the owner of the transformation :param str ownerDN: DN of the owner of the transformation :return: S_OK/S_ERROR with updated taskDict """ if taskDict: transID = taskDict.values()[0]['TransformationID'] else: return S_OK({}) method = '__prepareTasks' startTime = time.time() oJobTemplate = self.jobClass(transBody) oJobTemplate.setOwner(owner) oJobTemplate.setOwnerGroup(ownerGroup) oJobTemplate.setOwnerDN(ownerDN) try: site = oJobTemplate.workflow.findParameter('Site').getValue() except AttributeError: site = None jobType = oJobTemplate.workflow.findParameter('JobType').getValue() templateOK = False getOutputDataTiming = 0. for taskID, paramsDict in taskDict.iteritems(): # Create a job for each task and add it to the taskDict if not templateOK: templateOK = True # Update the template with common information self._logVerbose('Job owner:group to %s:%s' % (owner, ownerGroup), transID=transID, method=method) transGroup = str(transID).zfill(8) self._logVerbose('Adding default transformation group of %s' % (transGroup), transID=transID, method=method) oJobTemplate.setJobGroup(transGroup) if oJobTemplate.workflow.findParameter('PRODUCTION_ID'): oJobTemplate._setParamValue('PRODUCTION_ID', str(transID).zfill(8)) else: oJobTemplate._addParameter(oJobTemplate.workflow, 'PRODUCTION_ID', 'string', str(transID).zfill(8), "Production ID") if not oJobTemplate.workflow.findParameter('JOB_ID'): oJobTemplate._addParameter(oJobTemplate.workflow, 'JOB_ID', 'string', '00000000', "Initial JOB_ID") if site is not None: paramsDict['Site'] = site paramsDict['JobType'] = jobType # Now create the job from the template oJob = copy.deepcopy(oJobTemplate) constructedName = self._transTaskName(transID, taskID) self._logVerbose('Setting task name to %s' % constructedName, transID=transID, method=method) oJob.setName(constructedName) oJob._setParamValue('JOB_ID', str(taskID).zfill(8)) inputData = None self._logDebug('TransID: %s, TaskID: %s, paramsDict: %s' % (transID, taskID, str(paramsDict)), transID=transID, method=method) # These helper functions do the real job sites = self._handleDestination(paramsDict) if not sites: self._logError('Could not get a list a sites', transID=transID, method=method) paramsDict['TaskObject'] = '' continue else: self._logDebug('Setting Site: ', str(sites), transID=transID, method=method) res = oJob.setDestination(sites) if not res['OK']: self._logError('Could not set the site: %s' % res['Message'], transID=transID, method=method) paramsDict['TaskObject'] = '' continue self._handleInputs(oJob, paramsDict) self._handleRest(oJob, paramsDict) hospitalTrans = [ int(x) for x in self.opsH.getValue("Hospital/Transformations", []) ] if int(transID) in hospitalTrans: self._handleHospital(oJob) paramsDict['TaskObject'] = '' if self.outputDataModule: getOutputDataTiming -= time.time() res = self.getOutputData({ 'Job': oJob._toXML(), 'TransformationID': transID, 'TaskID': taskID, 'InputData': inputData }) getOutputDataTiming += time.time() if not res['OK']: self._logError("Failed to generate output data", res['Message'], transID=transID, method=method) continue for name, output in res['Value'].iteritems(): oJob._addJDLParameter(name, ';'.join(output)) paramsDict['TaskObject'] = oJob if taskDict: self._logVerbose('Average getOutputData time: %.1f per task' % (getOutputDataTiming / len(taskDict)), transID=transID, method=method) self._logInfo('Prepared %d tasks' % len(taskDict), transID=transID, method=method, reftime=startTime) return S_OK(taskDict) ############################################################################# def _handleDestination(self, paramsDict): """ Handle Sites and TargetSE in the parameters """ try: sites = ['ANY'] if paramsDict['Site']: # 'Site' comes from the XML and therefore is ; separated sites = fromChar(paramsDict['Site'], sepChar=';') except KeyError: pass if self.destinationPlugin_o: destinationPlugin_o = self.destinationPlugin_o else: res = self.__generatePluginObject(self.destinationPlugin) if not res['OK']: self._logFatal( "Could not generate a destination plugin object") return res destinationPlugin_o = res['Value'] self.destinationPlugin_o = destinationPlugin_o destinationPlugin_o.setParameters(paramsDict) destSites = destinationPlugin_o.run() if not destSites: return sites # Now we need to make the AND with the sites, if defined if sites != ['ANY']: # Need to get the AND destSites &= set(sites) return list(destSites) def _handleInputs(self, oJob, paramsDict): """ set job inputs (+ metadata) """ inputData = paramsDict.get('InputData') transID = paramsDict['TransformationID'] if inputData: self._logVerbose('Setting input data to %s' % inputData, transID=transID, method='_handleInputs') res = oJob.setInputData(inputData) if not res['OK']: self._logError("Could not set the inputs: %s" % res['Message'], transID=transID, method='_handleInputs') def _handleRest(self, oJob, paramsDict): """ add as JDL parameters all the other parameters that are not for inputs or destination """ transID = paramsDict['TransformationID'] for paramName, paramValue in paramsDict.iteritems(): if paramName not in ('InputData', 'Site', 'TargetSE'): if paramValue: self._logDebug('Setting %s to %s' % (paramName, paramValue), transID=transID, method='_handleRest') oJob._addJDLParameter(paramName, paramValue) def _handleHospital(self, oJob): """ Optional handle of hospital jobs """ oJob.setType('Hospital') oJob.setInputDataPolicy('download', dataScheduling=False) hospitalSite = self.opsH.getValue("Hospital/HospitalSite", 'DIRAC.JobDebugger.ch') oJob.setDestination(hospitalSite) hospitalCEs = self.opsH.getValue("Hospital/HospitalCEs", []) if hospitalCEs: oJob._addJDLParameter('GridCE', hospitalCEs) def __generatePluginObject(self, plugin): """ This simply instantiates the TaskManagerPlugin class with the relevant plugin name """ method = '__generatePluginObject' try: plugModule = __import__(self.pluginLocation, globals(), locals(), ['TaskManagerPlugin']) except ImportError as e: self._logException("Failed to import 'TaskManagerPlugin' %s: %s" % (plugin, e), method=method) return S_ERROR() try: plugin_o = getattr(plugModule, 'TaskManagerPlugin')('%s' % plugin, operationsHelper=self.opsH) return S_OK(plugin_o) except AttributeError as e: self._logException("Failed to create %s(): %s." % (plugin, e), method=method) return S_ERROR() ############################################################################# def getOutputData(self, paramDict): """ Get the list of job output LFNs from the provided plugin """ if not self.outputDataModule_o: # Create the module object moduleFactory = ModuleFactory() moduleInstance = moduleFactory.getModule(self.outputDataModule, None) if not moduleInstance['OK']: return moduleInstance self.outputDataModule_o = moduleInstance['Value'] # This is the "argument" to the module, set it and then execute self.outputDataModule_o.paramDict = paramDict return self.outputDataModule_o.execute() def submitTransformationTasks(self, taskDict): """ Submit the tasks """ if 'BulkJobObject' in taskDict: return self.__submitTransformationTasksBulk(taskDict) return self.__submitTransformationTasks(taskDict) def __submitTransformationTasksBulk(self, taskDict): """ Submit jobs in one go with one parametric job """ if not taskDict: return S_OK(taskDict) startTime = time.time() method = '__submitTransformationTasksBulk' oJob = taskDict.pop('BulkJobObject') # we can only do this, once the job has been popped, or we _might_ crash transID = taskDict.values()[0]['TransformationID'] if oJob is None: self._logError('no bulk Job object found', transID=transID, method=method) return S_ERROR(ETSUKN, 'No bulk job object provided for submission') result = self.submitTaskToExternal(oJob) if not result['OK']: self._logError('Failed to submit tasks to external', transID=transID, method=method) return result jobIDList = result['Value'] if len(jobIDList) != len(taskDict): for task in taskDict.values(): task['Success'] = False return S_ERROR( ETSUKN, 'Submitted less number of jobs than requested tasks') # Get back correspondance with tasks sorted by ID for jobID, taskID in zip(jobIDList, sorted(taskDict)): taskDict[taskID]['ExternalID'] = jobID taskDict[taskID]['Success'] = True submitted = len(jobIDList) self._logInfo('Submitted %d tasks to WMS in %.1f seconds' % (submitted, time.time() - startTime), transID=transID, method=method) return S_OK(taskDict) def __submitTransformationTasks(self, taskDict): """ Submit jobs one by one """ method = '__submitTransformationTasks' submitted = 0 failed = 0 startTime = time.time() for task in taskDict.itervalues(): transID = task['TransformationID'] if not task['TaskObject']: task['Success'] = False failed += 1 continue res = self.submitTaskToExternal(task['TaskObject']) if res['OK']: task['ExternalID'] = res['Value'] task['Success'] = True submitted += 1 else: self._logError("Failed to submit task to WMS", res['Message'], transID=transID, method=method) task['Success'] = False failed += 1 if submitted: self._logInfo('Submitted %d tasks to WMS in %.1f seconds' % (submitted, time.time() - startTime), transID=transID, method=method) if failed: self._logError('Failed to submit %d tasks to WMS.' % (failed), transID=transID, method=method) return S_OK(taskDict) def submitTaskToExternal(self, job): """ Submits a single job (which can be a bulk one) to the WMS. """ if isinstance(job, basestring): try: oJob = self.jobClass(job) except Exception as x: # pylint: disable=broad-except self._logException("Failed to create job object", '', x) return S_ERROR("Failed to create job object") elif isinstance(job, self.jobClass): oJob = job else: self._logError("No valid job description found") return S_ERROR("No valid job description found") workflowFileObject = StringIO.StringIO(oJob._toXML()) jdl = oJob._toJDL(jobDescriptionObject=workflowFileObject) return self.submissionClient.submitJob(jdl, workflowFileObject) def updateTransformationReservedTasks(self, taskDicts): transID = None jobNames = [ self._transTaskName(taskDict['TransformationID'], taskDict['TaskID']) for taskDict in taskDicts ] res = self.jobMonitoringClient.getJobs({'JobName': jobNames}) if not res['OK']: self._logError("Failed to get task from WMS", res['Message'], transID=transID, method='updateTransformationReservedTasks') return res jobNameIDs = {} for wmsID in res['Value']: res = self.jobMonitoringClient.getJobPrimarySummary(int(wmsID)) if not res['OK']: self._logWarn("Failed to get task summary from WMS", res['Message'], transID=transID, method='updateTransformationReservedTasks') else: jobNameIDs[res['Value']['JobName']] = int(wmsID) noTask = list(set(jobNames) - set(jobNameIDs)) return S_OK({'NoTasks': noTask, 'TaskNameIDs': jobNameIDs}) def getSubmittedTaskStatus(self, taskDicts): """ Check the status of a list of tasks and return lists of taskIDs for each new status """ method = 'getSubmittedTaskStatus' if taskDicts: wmsIDs = [ int(taskDict['ExternalID']) for taskDict in taskDicts if int(taskDict['ExternalID']) ] transID = taskDicts[0]['TransformationID'] else: return S_OK({}) res = self.jobMonitoringClient.getJobsStatus(wmsIDs) if not res['OK']: self._logWarn("Failed to get job status from the WMS system", transID=transID, method=method) return res statusDict = res['Value'] updateDict = {} for taskDict in taskDicts: taskID = taskDict['TaskID'] wmsID = int(taskDict['ExternalID']) if not wmsID: continue oldStatus = taskDict['ExternalStatus'] newStatus = statusDict.get(wmsID, {}).get('Status', 'Removed') if oldStatus != newStatus: if newStatus == "Removed": self._logVerbose( 'Production/Job %d/%d removed from WMS while it is in %s status' % (transID, taskID, oldStatus), transID=transID, method=method) newStatus = "Failed" self._logVerbose( 'Setting job status for Production/Job %d/%d to %s' % (transID, taskID, newStatus), transID=transID, method=method) updateDict.setdefault(newStatus, []).append(taskID) return S_OK(updateDict) def getSubmittedFileStatus(self, fileDicts): """ Check the status of a list of files and return the new status of each LFN """ if not fileDicts: return S_OK({}) method = 'getSubmittedFileStatus' # All files are from the same transformation transID = fileDicts[0]['TransformationID'] taskFiles = {} for fileDict in fileDicts: jobName = self._transTaskName(transID, fileDict['TaskID']) taskFiles.setdefault(jobName, {})[fileDict['LFN']] = fileDict['Status'] res = self.updateTransformationReservedTasks(fileDicts) if not res['OK']: self._logWarn("Failed to obtain taskIDs for files", transID=transID, method=method) return res noTasks = res['Value']['NoTasks'] taskNameIDs = res['Value']['TaskNameIDs'] updateDict = {} for jobName in noTasks: for lfn, oldStatus in taskFiles[jobName].iteritems(): if oldStatus != 'Unused': updateDict[lfn] = 'Unused' res = self.jobMonitoringClient.getJobsStatus(taskNameIDs.values()) if not res['OK']: self._logWarn("Failed to get job status from the WMS system", transID=transID, method=method) return res statusDict = res['Value'] for jobName, wmsID in taskNameIDs.iteritems(): jobStatus = statusDict.get(wmsID, {}).get('Status') newFileStatus = { 'Done': 'Processed', 'Completed': 'Processed', 'Failed': 'Unused' }.get(jobStatus) if newFileStatus: for lfn, oldStatus in taskFiles[jobName].iteritems(): if newFileStatus != oldStatus: updateDict[lfn] = newFileStatus return S_OK(updateDict)
def export_getTransformationSummaryWeb(self, selectDict, sortList, startItem, maxItems): """ Get the summary of the transformation information for a given page in the generic format """ # Obtain the timing information from the selectDict last_update = selectDict.get('CreationDate', None) if last_update: del selectDict['CreationDate'] fromDate = selectDict.get('FromDate', None) if fromDate: del selectDict['FromDate'] if not fromDate: fromDate = last_update toDate = selectDict.get('ToDate', None) if toDate: del selectDict['ToDate'] # Sorting instructions. Only one for the moment. if sortList: orderAttribute = sortList[0][0] + ":" + sortList[0][1] else: orderAttribute = None # Get the transformations that match the selection res = database.getTransformations(condDict=selectDict, older=toDate, newer=fromDate, orderAttribute=orderAttribute) if not res['OK']: return self._parseRes(res) # Prepare the standard structure now within the resultDict dictionary resultDict = {} trList = res['Records'] # Create the total records entry nTrans = len(trList) resultDict['TotalRecords'] = nTrans # Create the ParameterNames entry paramNames = res['ParameterNames'] resultDict['ParameterNames'] = paramNames # Add the job states to the ParameterNames entry taskStateNames = [ 'TotalCreated', 'Created', 'Running', 'Submitted', 'Failed', 'Waiting', 'Done', 'Completed', 'Stalled', 'Killed', 'Staging', 'Checking', 'Rescheduled', 'Scheduled' ] resultDict['ParameterNames'] += ['Jobs_' + x for x in taskStateNames] # Add the file states to the ParameterNames entry fileStateNames = [ 'PercentProcessed', 'Processed', 'Unused', 'Assigned', 'Total', 'Problematic', 'ApplicationCrash', 'MaxReset' ] resultDict['ParameterNames'] += ['Files_' + x for x in fileStateNames] # Get the transformations which are within the selected window if nTrans == 0: return S_OK(resultDict) ini = startItem last = ini + maxItems if ini >= nTrans: return S_ERROR('Item number out of range') if last > nTrans: last = nTrans transList = trList[ini:last] statusDict = {} extendableTranfs = Operations().getValue( 'Transformations/ExtendableTransfTypes', ['Simulation', 'MCsimulation']) givenUpFileStatus = Operations().getValue( 'Transformations/GivenUpFileStatus', ['NotProcessed', 'Removed', 'MissingInFC', 'MissingLFC']) problematicStatuses = Operations().getValue( 'Transformations/ProblematicStatuses', ['Problematic']) # Add specific information for each selected transformation for trans in transList: transDict = dict(zip(paramNames, trans)) # Update the status counters status = transDict['Status'] statusDict[status] = statusDict.setdefault(status, 0) + 1 # Get the statistics on the number of jobs for the transformation transID = transDict['TransformationID'] res = database.getTransformationTaskStats(transID) taskDict = {} if res['OK'] and res['Value']: taskDict = res['Value'] for state in taskStateNames: trans.append(taskDict.get(state, 0)) # Get the statistics for the number of files for the transformation fileDict = {} transType = transDict['Type'] if transType.lower() in extendableTranfs: fileDict['PercentProcessed'] = '-' else: res = database.getTransformationStats(transID) if res['OK']: fileDict = res['Value'] total = fileDict['Total'] for stat in givenUpFileStatus: total -= fileDict.get(stat, 0) processed = fileDict.get('Processed', 0) fileDict['PercentProcessed'] = "%.1f" % ( int(processed * 1000. / total) / 10.) if total else 0. problematic = 0 for stat in problematicStatuses: problematic += fileDict.get(stat, 0) fileDict['Problematic'] = problematic for state in fileStateNames: trans.append(fileDict.get(state, 0)) resultDict['Records'] = transList resultDict['Extras'] = statusDict return S_OK(resultDict)