class ProdValidator(object): def __init__(self): self.transClient = TransformationClient() def checkTransStatus(self, transID): """ Check if the status of the transformation is valid for the transformation to be added to a production. New is the only valid status :param int transID: the TransformationID """ res = self.transClient.getTransformationParameters(transID, 'Status') if not res['OK']: return res status = res['Value'] if status != 'New': return S_ERROR( "checkTransStatus failed : Invalid transformation status: %s" % status) return S_OK() def checkTransDependency(self, transID, parentTransID): """ Check if the transformation and the parent transformation are linked :param int transID: the TransformationID :param int parentTransID: the parent TransformationID """ res = self.transClient.getTransformationMetaQuery(transID, 'Input') if not res['OK']: return res inputquery = res['Value'] if not inputquery: return S_ERROR("No InputMetaQuery defined for transformation %s" % transID) res = self.transClient.getTransformationMetaQuery( parentTransID, 'Output') if not res['OK']: return res parentoutputquery = res['Value'] if not parentoutputquery: return S_ERROR( "No OutputMetaQuery defined for parent transformation %s" % parentTransID) # Check the matching between inputquery and parent outputmeta query # Currently very simplistic: just support expression with "=" and "in" operators gLogger.notice("Applying checkMatchQuery") res = self.checkMatchQuery(inputquery, parentoutputquery) if not res['OK']: gLogger.error("checkMatchQuery failed") return res if not res['Value']: return S_ERROR("checkMatchQuery result is False") return S_OK() def checkMatchQuery(self, mq, mqParent): """ Check the logical intersection between the two metaqueries :param dict mq: a dictionary of the MetaQuery to be checked against the mqParent :param dict mqParent: a dictionary of the parent MetaQuery to be checked against the mq """ # Get the metadata types defined in the catalog catalog = FileCatalog() res = catalog.getMetadataFields() if not res['OK']: gLogger.error("Error in getMetadataFields: %s" % res['Message']) return res if not res['Value']: gLogger.error("Error: no metadata fields defined") return res MetaTypeDict = res['Value']['FileMetaFields'] MetaTypeDict.update(res['Value']['DirectoryMetaFields']) res = self.checkformatQuery(mq) if not res['OK']: return res MetaQueryDict = res['Value'] res = self.checkformatQuery(mqParent) if not res['OK']: return res ParentMetaQueryDict = res['Value'] for meta, value in MetaQueryDict.items(): if meta not in MetaTypeDict: msg = 'Metadata %s is not defined in the Catalog' % meta return S_ERROR(msg) mtype = MetaTypeDict[meta] if mtype.lower() not in ['varchar(128)', 'int', 'float']: msg = 'Metatype %s is not supported' % mtype.lower() return S_ERROR(msg) if meta not in ParentMetaQueryDict: msg = 'Metadata %s is not in parent transformation query' % meta return S_ERROR(msg) if self.compareValues(value, ParentMetaQueryDict[meta]): continue else: msg = "Metadata values %s do not match with %s" % ( value, ParentMetaQueryDict[meta]) gLogger.error(msg) return S_OK(False) return S_OK(True) def checkformatQuery(self, MetaQueryDict): """ Check the format query and transform all dict values in dict for uniform treatment :param dict MetaQueryDict: a dictionary of the MetaQuery """ for meta, value in MetaQueryDict.items(): values = [] if isinstance(value, dict): operation = value.keys()[0] if operation not in ['=', 'in']: msg = 'Operation %s is not supported' % operation return S_ERROR(msg) else: if not isinstance(value.values()[0], list): MetaQueryDict[meta] = {"in": value.values()} else: values.append(value) MetaQueryDict[meta] = {"in": values} return S_OK(MetaQueryDict) def compareValues(self, value, parentValue): """ Very simple comparison. To be improved :param dict value: a dictionary with meta data values to be compared with the parentValues :param dict parentValue: a dictionary with meta data parentValues be compared with values """ return set(value.values()[0]).issubset(set( parentValue.values()[0])) or set(parentValue.values()[0]).issubset( set(value.values()[0]))
class TransformationCLI(CLI, API): def __init__(self): self.transClient = TransformationClient() self.indentSpace = 4 CLI.__init__(self) API.__init__(self) def printPair(self, key, value, separator=":"): valueList = value.split("\n") print("%s%s%s %s" % (key, " " * (self.indentSpace - len(key)), separator, valueList[0].strip())) for valueLine in valueList[1:-1]: print("%s %s" % (" " * self.indentSpace, valueLine.strip())) def do_help(self, args): """ Default version of the help command Usage: help <command> OR use helpall to see description for all commands""" CLI.do_help(self, args) # overriting default help command def do_helpall(self, args): """ Shows help information Usage: helpall <command> If no command is specified all commands are shown """ if len(args) == 0: print("\nAvailable commands:\n") attrList = sorted(dir(self)) for attribute in attrList: if attribute.find("do_") == 0: self.printPair(attribute[3:], getattr(self, attribute).__doc__[1:]) print("") else: command = args.split()[0].strip() try: obj = getattr(self, "do_%s" % command) except BaseException: print("There's no such %s command" % command) return self.printPair(command, obj.__doc__[1:]) def do_shell(self, args): """Execute a shell command usage !<shell_command> """ comm = args res = shellCall(0, comm) if res['OK'] and res['Value'][0] == 0: _returnCode, stdOut, stdErr = res['Value'] print("%s\n%s" % (stdOut, stdErr)) else: print(res['Message']) def check_params(self, args, num): """Checks if the number of parameters correct""" argss = args.split() length = len(argss) if length < num: print("Error: Number of arguments provided %d less that required %d, please correct." % (length, num)) return (False, length) return (argss, length) def check_id_or_name(self, id_or_name): """resolve name or Id by converting type of argument """ if id_or_name.isdigit(): return int(id_or_name) # its look like id return id_or_name #################################################################### # # These are the methods for transformation manipulation # def do_getall(self, args): """Get transformation details usage: getall [Status] [Status] """ oTrans = Transformation() oTrans.getTransformations(transStatus=args.split(), printOutput=True) def do_getAllByUser(self, args): """Get all transformations created by a given user The first argument is the authorDN or username. The authorDN is preferred: it need to be inside quotes because contains white spaces. Only authorDN should be quoted. When the username is provided instead, the authorDN is retrieved from the uploaded proxy, so that the retrieved transformations are those created by the user who uploaded that proxy: that user could be different that the username provided to the function. usage: getAllByUser authorDN or username [Status] [Status] """ oTrans = Transformation() argss = args.split() username = "" author = "" status = [] if not len(argss) > 0: print(self.do_getAllByUser.__doc__) return # if the user didnt quoted the authorDN ends if '=' in argss[0] and argss[0][0] not in ["'", '"']: print("AuthorDN need to be quoted (just quote that argument)") return if argss[0][0] in ["'", '"']: # authorDN given author = argss[0] status_idx = 1 for arg in argss[1:]: author += ' ' + arg status_idx += 1 if arg[-1] in ["'", '"']: break # At this point we should have something like 'author' if not author[0] in ["'", '"'] or not author[-1] in ["'", '"']: print("AuthorDN need to be quoted (just quote that argument)") return else: author = author[1:-1] # throw away the quotes # the rest are the requested status status = argss[status_idx:] else: # username given username = argss[0] status = argss[1:] oTrans.getTransformationsByUser(authorDN=author, userName=username, transStatus=status, printOutput=True) def do_summaryTransformations(self, args): """Show the summary for a list of Transformations Fields starting with 'F' ('J') refers to files (jobs). Proc. stand for processed. Usage: summaryTransformations <ProdID> [<ProdID> ...] """ argss = args.split() if not len(argss) > 0: print(self.do_summaryTransformations.__doc__) return transid = argss oTrans = Transformation() oTrans.getSummaryTransformations(transID=transid) def do_getStatus(self, args): """Get transformation details usage: getStatus <transName|ID> """ argss = args.split() if not len(argss) > 0: print("no transformation supplied") return for transName in argss: res = self.transClient.getTransformation(transName) if not res['OK']: print("Getting status of %s failed: %s" % (transName, res['Message'])) else: print("%s: %s" % (transName, res['Value']['Status'])) def do_setStatus(self, args): """Set transformation status usage: setStatus <Status> <transName|ID> Status <'New' 'Active' 'Stopped' 'Completed' 'Cleaning'> """ argss = args.split() if not len(argss) > 1: print("transformation and status not supplied") return status = argss[0] transNames = argss[1:] for transName in transNames: res = self.transClient.setTransformationParameter(transName, 'Status', status) if not res['OK']: print("Setting status of %s failed: %s" % (transName, res['Message'])) else: print("%s set to %s" % (transName, status)) def do_start(self, args): """Start transformation usage: start <transName|ID> """ argss = args.split() if not len(argss) > 0: print("no transformation supplied") return for transName in argss: res = self.transClient.setTransformationParameter(transName, 'Status', 'Active') if not res['OK']: print("Setting Status of %s failed: %s" % (transName, res['Message'])) else: res = self.transClient.setTransformationParameter(transName, 'AgentType', 'Automatic') if not res['OK']: print("Setting AgentType of %s failed: %s" % (transName, res['Message'])) else: print("%s started" % transName) def do_stop(self, args): """Stop transformation usage: stop <transID|ID> """ argss = args.split() if not len(argss) > 0: print("no transformation supplied") return if not len(argss) > 0: print("no transformation supplied") return for transName in argss: res = self.transClient.setTransformationParameter(transName, 'AgentType', 'Manual') if not res['OK']: print("Stopping of %s failed: %s" % (transName, res['Message'])) else: print("%s stopped" % transName) def do_flush(self, args): """Flush transformation usage: flush <transName|ID> """ argss = args.split() if not len(argss) > 0: print("no transformation supplied") return for transName in argss: res = self.transClient.setTransformationParameter(transName, 'Status', 'Flush') if not res['OK']: print("Flushing of %s failed: %s" % (transName, res['Message'])) else: print("%s flushing" % transName) def do_get(self, args): """Get transformation definition usage: get <transName|ID> """ argss = args.split() if not len(argss) > 0: print("no transformation supplied") return transName = argss[0] res = self.transClient.getTransformation(transName) if not res['OK']: print("Failed to get %s: %s" % (transName, res['Message'])) else: res['Value'].pop('Body') printDict(res['Value']) def do_getBody(self, args): """Get transformation body usage: getBody <transName|ID> """ argss = args.split() if not len(argss) > 0: print("no transformation supplied") return transName = argss[0] res = self.transClient.getTransformation(transName) if not res['OK']: print("Failed to get %s: %s" % (transName, res['Message'])) else: print(res['Value']['Body']) def do_getFileStat(self, args): """Get transformation file statistics usage: getFileStat <transName|ID> """ argss = args.split() if not len(argss) > 0: print("no transformation supplied") return transName = argss[0] res = self.transClient.getTransformationStats(transName) if not res['OK']: print("Failed to get statistics for %s: %s" % (transName, res['Message'])) else: res['Value'].pop('Total') printDict(res['Value']) def do_modMask(self, args): """Modify transformation input definition usage: modInput <mask> <transName|ID> """ argss = args.split() if not len(argss) > 0: print("no transformation supplied") return mask = argss[0] transNames = argss[1:] for transName in transNames: res = self.transClient.setTransformationParameter(transName, "FileMask", mask) if not res['OK']: print("Failed to modify input file mask for %s: %s" % (transName, res['Message'])) else: print("Updated %s filemask" % transName) def do_getFiles(self, args): """Get files for the transformation (optionally with a given status) usage: getFiles <transName|ID> [Status] [Status] """ argss = args.split() if not len(argss) > 0: print("no transformation supplied") return transName = argss[0] status = argss[1:] res = self.transClient.getTransformation(transName) if not res['OK']: print("Failed to get transformation information: %s" % res['Message']) else: selectDict = {'TransformationID': res['Value']['TransformationID']} if status: selectDict['Status'] = status res = self.transClient.getTransformationFiles(condDict=selectDict) if not res['OK']: print("Failed to get transformation files: %s" % res['Message']) elif res['Value']: self._printFormattedDictList(res['Value'], ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'], 'LFN', 'LFN') else: print("No files found") def do_getFileStatus(self, args): """Get file(s) status for the given transformation usage: getFileStatus <transName|ID> <lfn> [<lfn>...] """ argss = args.split() if len(argss) < 2: print("transformation and file not supplied") return transName = argss[0] lfns = argss[1:] res = self.transClient.getTransformation(transName) if not res['OK']: print("Failed to get transformation information: %s" % res['Message']) else: selectDict = {'TransformationID': res['Value']['TransformationID']} res = self.transClient.getTransformationFiles(condDict=selectDict) if not res['OK']: print("Failed to get transformation files: %s" % res['Message']) elif res['Value']: filesList = [] for fileDict in res['Value']: if fileDict['LFN'] in lfns: filesList.append(fileDict) if filesList: self._printFormattedDictList(filesList, ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'], 'LFN', 'LFN') else: print("Could not find any LFN in", lfns, "for transformation", transName) else: print("No files found") def do_getOutputFiles(self, args): """Get output files for the transformation usage: getOutputFiles <transName|ID> """ argss = args.split() if not len(argss) > 0: print("no transformation supplied") return transName = argss[0] res = self.transClient.getTransformation(transName) if not res['OK']: print("Failed to get transformation information: %s" % res['Message']) else: fc = FileCatalog() meta = {} meta['ProdID'] = transName res = fc.findFilesByMetadata(meta) if not res['OK']: print(res['Message']) return if not len(res['Value']) > 0: print('No output files yet for transformation %d' % int(transName)) return else: for lfn in res['Value']: print(lfn) def do_getInputDataQuery(self, args): """Get input data query for the transformation usage: getInputDataQuery <transName|ID> """ argss = args.split() if not len(argss) > 0: print("no transformation supplied") return transName = argss[0] # res = self.transClient.getTransformationInputDataQuery( transName ) res = self.transClient.getTransformationMetaQuery(transName, 'Input') if not res['OK']: print("Failed to get transformation input data query: %s" % res['Message']) else: print(res['Value']) def do_setFileStatus(self, args): """Set file status for the given transformation usage: setFileStatus <transName|ID> <lfn> <status> """ argss = args.split() if not len(argss) == 3: print("transformation file and status not supplied") return transName = argss[0] lfn = argss[1] status = argss[2] res = self.transClient.setFileStatusForTransformation(transName, status, [lfn]) if not res['OK']: print("Failed to update file status: %s" % res['Message']) else: print("Updated file status to %s" % status) def do_resetFile(self, args): """Reset file status for the given transformation usage: resetFile <transName|ID> <lfns> """ argss = args.split() if not len(argss) > 1: print("transformation and file(s) not supplied") return transName = argss[0] lfns = argss[1:] res = self.transClient.setFileStatusForTransformation(transName, 'Unused', lfns) if not res['OK']: print("Failed to reset file status: %s" % res['Message']) else: if 'Failed' in res['Value']: print("Could not reset some files: ") for lfn, reason in res['Value']['Failed'].items(): print(lfn, reason) else: print("Updated file statuses to 'Unused' for %d file(s)" % len(lfns)) def do_resetProcessedFile(self, args): """ Reset file status for the given transformation usage: resetFile <transName|ID> <lfn> """ argss = args.split() if not len(argss) > 1: print("transformation and file(s) not supplied") return transName = argss[0] lfns = argss[1:] res = self.transClient.setFileStatusForTransformation(transName, 'Unused', lfns, force=True) if not res['OK']: print("Failed to reset file status: %s" % res['Message']) else: if 'Failed' in res['Value'] and res['Value']['Failed']: print("Could not reset some files: ") for lfn, reason in res['Value']['Failed'].items(): print(lfn, reason) else: print("Updated file statuses to 'Unused' for %d file(s)" % len(lfns)) #################################################################### # # These are the methods for file manipulation # def do_addDirectory(self, args): """Add files from the given catalog directory usage: addDirectory <directory> [directory] """ argss = args.split() if not len(argss) > 0: print("no directory supplied") return for directory in argss: res = self.transClient.addDirectory(directory, force=True) if not res['OK']: print('failed to add directory %s: %s' % (directory, res['Message'])) else: print('added %s files for %s' % (res['Value'], directory)) def do_replicas(self, args): """ Get replicas for <path> usage: replicas <lfn> [lfn] """ argss = args.split() if not len(argss) > 0: print("no files supplied") return res = self.transClient.getReplicas(argss) if not res['OK']: print("failed to get any replica information: %s" % res['Message']) return for lfn in sorted(res['Value']['Failed']): error = res['Value']['Failed'][lfn] print("failed to get replica information for %s: %s" % (lfn, error)) for lfn in sorted(res['Value']['Successful']): ses = sorted(res['Value']['Successful'][lfn]) outStr = "%s :" % lfn.ljust(100) for se in ses: outStr = "%s %s" % (outStr, se.ljust(15)) print(outStr) def do_addFile(self, args): """Add new files to transformation DB usage: addFile <lfn> [lfn] """ argss = args.split() if not len(argss) > 0: print("no files supplied") return lfnDict = {} for lfn in argss: lfnDict[lfn] = {'PFN': 'IGNORED-PFN', 'SE': 'IGNORED-SE', 'Size': 0, 'GUID': 'IGNORED-GUID', 'Checksum': 'IGNORED-CHECKSUM'} res = self.transClient.addFile(lfnDict, force=True) if not res['OK']: print("failed to add any files: %s" % res['Message']) return for lfn in sorted(res['Value']['Failed']): error = res['Value']['Failed'][lfn] print("failed to add %s: %s" % (lfn, error)) for lfn in sorted(res['Value']['Successful']): print("added %s" % lfn) def do_removeFile(self, args): """Remove file from transformation DB usage: removeFile <lfn> [lfn] """ argss = args.split() if not len(argss) > 0: print("no files supplied") return res = self.transClient.removeFile(argss) if not res['OK']: print("failed to remove any files: %s" % res['Message']) return for lfn in sorted(res['Value']['Failed']): error = res['Value']['Failed'][lfn] print("failed to remove %s: %s" % (lfn, error)) for lfn in sorted(res['Value']['Successful']): print("removed %s" % lfn) def do_addReplica(self, args): """ Add new replica to the transformation DB usage: addReplica <lfn> <se> """ argss = args.split() if not len(argss) == 2: print("no file info supplied") return lfn = argss[0] se = argss[1] lfnDict = {} lfnDict[lfn] = {'PFN': 'IGNORED-PFN', 'SE': se, 'Size': 0, 'GUID': 'IGNORED-GUID', 'Checksum': 'IGNORED-CHECKSUM'} res = self.transClient.addReplica(lfnDict, force=True) if not res['OK']: print("failed to add replica: %s" % res['Message']) return for lfn in sorted(res['Value']['Failed']): error = res['Value']['Failed'][lfn] print("failed to add replica: %s" % (error)) for lfn in sorted(res['Value']['Successful']): print("added %s" % lfn) def do_removeReplica(self, args): """Remove replica from the transformation DB usage: removeReplica <lfn> <se> """ argss = args.split() if not len(argss) == 2: print("no file info supplied") return lfn = argss[0] se = argss[1] lfnDict = {} lfnDict[lfn] = {'PFN': 'IGNORED-PFN', 'SE': se, 'Size': 0, 'GUID': 'IGNORED-GUID', 'Checksum': 'IGNORED-CHECKSUM'} res = self.transClient.removeReplica(lfnDict) if not res['OK']: print("failed to remove replica: %s" % res['Message']) return for lfn in sorted(res['Value']['Failed']): error = res['Value']['Failed'][lfn] print("failed to remove replica: %s" % (error)) for lfn in sorted(res['Value']['Successful']): print("removed %s" % lfn) def do_setReplicaStatus(self, args): """Set replica status, usually used to mark a replica Problematic usage: setReplicaStatus <lfn> <status> <se> """ argss = args.split() if not len(argss) > 2: print("no file info supplied") return lfn = argss[0] status = argss[1] se = argss[2] lfnDict = {} lfnDict[lfn] = { 'Status': status, 'PFN': 'IGNORED-PFN', 'SE': se, 'Size': 0, 'GUID': 'IGNORED-GUID', 'Checksum': 'IGNORED-CHECKSUM'} res = self.transClient.setReplicaStatus(lfnDict) if not res['OK']: print("failed to set replica status: %s" % res['Message']) return for lfn in sorted(res['Value']['Failed']): error = res['Value']['Failed'][lfn] print("failed to set replica status: %s" % (error)) for lfn in sorted(res['Value']['Successful']): print("updated replica status %s" % lfn)
class InputDataAgent(AgentModule): def __init__(self, *args, **kwargs): ''' c'tor ''' AgentModule.__init__(self, *args, **kwargs) self.fileLog = {} self.timeLog = {} self.fullTimeLog = {} self.pollingTime = self.am_getOption('PollingTime', 120) self.fullUpdatePeriod = self.am_getOption('FullUpdatePeriod', 86400) self.refreshonly = self.am_getOption('RefreshOnly', False) self.dateKey = self.am_getOption('DateKey', None) self.transClient = TransformationClient() self.metadataClient = FileCatalogClient() self.transformationTypes = None ############################################################################# def initialize(self): ''' Make the necessary initializations ''' gMonitor.registerActivity("Iteration", "Agent Loops", AGENT_NAME, "Loops/min", gMonitor.OP_SUM) agentTSTypes = self.am_getOption('TransformationTypes', []) if agentTSTypes: self.transformationTypes = sorted(agentTSTypes) else: dataProc = Operations().getValue('Transformations/DataProcessing', ['MCSimulation', 'Merge']) dataManip = Operations().getValue( 'Transformations/DataManipulation', ['Replication', 'Removal']) self.transformationTypes = sorted(dataProc + dataManip) extendables = Operations().getValue( 'Transformations/ExtendableTransfTypes', []) if extendables: for extendable in extendables: if extendable in self.transformationTypes: self.transformationTypes.remove(extendable) # This is because the Extendables do not use this Agent (have no Input data query) return S_OK() ############################################################################## def execute(self): ''' Main execution method ''' gMonitor.addMark('Iteration', 1) # Get all the transformations result = self.transClient.getTransformations({ 'Status': 'Active', 'Type': self.transformationTypes }) if not result['OK']: self.log.error( "InputDataAgent.execute: Failed to get transformations.", result['Message']) return S_OK() # Process each transformation for transDict in result['Value']: transID = long(transDict['TransformationID']) # res = self.transClient.getTransformationInputDataQuery( transID ) res = self.transClient.getTransformationMetaQuery(transID, 'Input') if not res['OK']: if cmpError(res, ENOENT): self.log.info( "InputDataAgent.execute: No input data query found for transformation", transID) else: self.log.error( "InputDataAgent.execute: Failed to get input data query", "for %d: %s" % (transID, res['Message'])) continue inputDataQuery = res['Value'] if self.refreshonly: # Determine the correct time stamp to use for this transformation if transID in self.timeLog: if transID in self.fullTimeLog: # If it is more than a day since the last reduced query, make a full query just in case if (datetime.datetime.utcnow() - self.fullTimeLog[transID] ) < datetime.timedelta( seconds=self.fullUpdatePeriod): timeStamp = self.timeLog[transID] if self.dateKey: inputDataQuery[self.dateKey] = ( timeStamp - datetime.timedelta(seconds=10) ).strftime('%Y-%m-%d %H:%M:%S') else: self.log.error( "DateKey was not set in the CS, cannot use the RefreshOnly" ) else: self.fullTimeLog[ transID] = datetime.datetime.utcnow() self.timeLog[transID] = datetime.datetime.utcnow() if transID not in self.fullTimeLog: self.fullTimeLog[transID] = datetime.datetime.utcnow() # Perform the query to the metadata catalog self.log.verbose("Using input data query for transformation", "%d: %s" % (transID, str(inputDataQuery))) start = time.time() result = self.metadataClient.findFilesByMetadata(inputDataQuery) rtime = time.time() - start self.log.verbose("Metadata catalog query time", ": %.2f seconds." % (rtime)) if not result['OK']: self.log.error( "InputDataAgent.execute: Failed to get response from the metadata catalog", result['Message']) continue lfnList = result['Value'] # Check if the number of files has changed since the last cycle nlfns = len(lfnList) self.log.info( "files returned for transformation from the metadata catalog: ", "%d -> %d" % (int(transID), nlfns)) if nlfns == self.fileLog.get(transID): self.log.verbose( 'No new files in metadata catalog since last check') self.fileLog[transID] = nlfns # Add any new files to the transformation addedLfns = [] if lfnList: self.log.verbose('Processing lfns for transformation:', "%d -> %d" % (transID, len(lfnList))) # Add the files to the transformation self.log.verbose('Adding lfns for transformation:', "%d -> %d" % (transID, len(lfnList))) result = self.transClient.addFilesToTransformation( transID, sorted(lfnList)) if not result['OK']: self.log.warn( "InputDataAgent.execute: failed to add lfns to transformation", result['Message']) self.fileLog[transID] = 0 else: if result['Value']['Failed']: for lfn, error in res['Value']['Failed'].items(): self.log.warn( "InputDataAgent.execute: Failed to add to transformation:", "%s: %s" % (lfn, error)) if result['Value']['Successful']: for lfn, status in result['Value']['Successful'].items( ): if status == 'Added': addedLfns.append(lfn) self.log.info( "InputDataAgent.execute: Added files to transformation", "(%d)" % len(addedLfns)) return S_OK()
class InputDataAgent(AgentModule): def __init__(self, *args, **kwargs): """c'tor""" AgentModule.__init__(self, *args, **kwargs) self.fileLog = {} self.timeLog = {} self.fullTimeLog = {} self.pollingTime = self.am_getOption("PollingTime", 120) self.fullUpdatePeriod = self.am_getOption("FullUpdatePeriod", 86400) self.refreshonly = self.am_getOption("RefreshOnly", False) self.dateKey = self.am_getOption("DateKey", None) self.transClient = TransformationClient() self.metadataClient = FileCatalogClient() self.transformationTypes = None ############################################################################# def initialize(self): """Make the necessary initializations""" agentTSTypes = self.am_getOption("TransformationTypes", []) if agentTSTypes: self.transformationTypes = sorted(agentTSTypes) else: dataProc = Operations().getValue("Transformations/DataProcessing", ["MCSimulation", "Merge"]) dataManip = Operations().getValue( "Transformations/DataManipulation", ["Replication", "Removal"]) self.transformationTypes = sorted(dataProc + dataManip) extendables = Operations().getValue( "Transformations/ExtendableTransfTypes", []) if extendables: for extendable in extendables: if extendable in self.transformationTypes: self.transformationTypes.remove(extendable) # This is because the Extendables do not use this Agent (have no Input data query) return S_OK() ############################################################################## def execute(self): """Main execution method""" # Get all the transformations result = self.transClient.getTransformations({ "Status": "Active", "Type": self.transformationTypes }) if not result["OK"]: self.log.error( "InputDataAgent.execute: Failed to get transformations.", result["Message"]) return S_OK() # Process each transformation for transDict in result["Value"]: transID = int(transDict["TransformationID"]) # res = self.transClient.getTransformationInputDataQuery( transID ) res = self.transClient.getTransformationMetaQuery(transID, "Input") if not res["OK"]: if cmpError(res, ENOENT): self.log.info( "InputDataAgent.execute: No input data query found for transformation", transID) else: self.log.error( "InputDataAgent.execute: Failed to get input data query", "for %d: %s" % (transID, res["Message"]), ) continue inputDataQuery = res["Value"] if self.refreshonly: # Determine the correct time stamp to use for this transformation if transID in self.timeLog: if transID in self.fullTimeLog: # If it is more than a day since the last reduced query, make a full query just in case if (datetime.datetime.utcnow() - self.fullTimeLog[transID] ) < datetime.timedelta( seconds=self.fullUpdatePeriod): timeStamp = self.timeLog[transID] if self.dateKey: inputDataQuery[self.dateKey] = ( timeStamp - datetime.timedelta(seconds=10) ).strftime("%Y-%m-%d %H:%M:%S") else: self.log.error( "DateKey was not set in the CS, cannot use the RefreshOnly" ) else: self.fullTimeLog[ transID] = datetime.datetime.utcnow() self.timeLog[transID] = datetime.datetime.utcnow() if transID not in self.fullTimeLog: self.fullTimeLog[transID] = datetime.datetime.utcnow() # Perform the query to the metadata catalog self.log.verbose("Using input data query for transformation", "%d: %s" % (transID, str(inputDataQuery))) start = time.time() result = self.metadataClient.findFilesByMetadata(inputDataQuery) rtime = time.time() - start self.log.verbose("Metadata catalog query time", ": %.2f seconds." % (rtime)) if not result["OK"]: self.log.error( "InputDataAgent.execute: Failed to get response from the metadata catalog", result["Message"]) continue lfnList = result["Value"] # Check if the number of files has changed since the last cycle nlfns = len(lfnList) self.log.info( "files returned for transformation from the metadata catalog: ", "%d -> %d" % (int(transID), nlfns)) if nlfns == self.fileLog.get(transID): self.log.verbose( "No new files in metadata catalog since last check") self.fileLog[transID] = nlfns # Add any new files to the transformation addedLfns = [] if lfnList: self.log.verbose("Processing lfns for transformation:", "%d -> %d" % (transID, len(lfnList))) # Add the files to the transformation self.log.verbose("Adding lfns for transformation:", "%d -> %d" % (transID, len(lfnList))) result = self.transClient.addFilesToTransformation( transID, sorted(lfnList)) if not result["OK"]: self.log.warn( "InputDataAgent.execute: failed to add lfns to transformation", result["Message"]) self.fileLog[transID] = 0 else: if result["Value"]["Failed"]: for lfn, error in res["Value"]["Failed"].items(): self.log.warn( "InputDataAgent.execute: Failed to add to transformation:", "%s: %s" % (lfn, error)) if result["Value"]["Successful"]: for lfn, status in result["Value"]["Successful"].items( ): if status == "Added": addedLfns.append(lfn) self.log.info( "InputDataAgent.execute: Added files to transformation", "(%d)" % len(addedLfns)) return S_OK()