Пример #1
0
def main():
  Script.parseCommandLine(ignoreErrors=True)
  args = Script.getPositionalArgs()
  if len(args) < 2:
    Script.showHelp()

  from DIRAC import exit as DIRACExit, gLogger

  lfn = args[0]
  se = args[1]

  from DIRAC.StorageManagementSystem.Client.StorageManagerClient import StorageManagerClient
  client = StorageManagerClient()
  res = client.getCacheReplicas({'LFN': lfn, 'SE': se})
  if not res['OK']:
    gLogger.error(res['Message'])
  cacheReplicaInfo = res['Value']
  if cacheReplicaInfo:
    replicaID = list(cacheReplicaInfo)[0]
    outStr = "\n--------------------"
    outStr += "\n%s: %s" % ('LFN'.ljust(8), cacheReplicaInfo[replicaID]['LFN'].ljust(100))
    outStr += "\n%s: %s" % ('SE'.ljust(8), cacheReplicaInfo[replicaID]['SE'].ljust(100))
    outStr += "\n%s: %s" % ('PFN'.ljust(8), cacheReplicaInfo[replicaID]['PFN'].ljust(100))
    outStr += "\n%s: %s" % ('Status'.ljust(8), cacheReplicaInfo[replicaID]['Status'].ljust(100))
    outStr += "\n%s: %s" % ('LastUpdate'.ljust(8), str(cacheReplicaInfo[replicaID]['LastUpdate']).ljust(100))
    outStr += "\n%s: %s" % ('Reason'.ljust(8), str(cacheReplicaInfo[replicaID]['Reason']).ljust(100))

    resTasks = client.getTasks({'ReplicaID': replicaID})

    if resTasks['OK']:
      # print resTasks['Message']
      outStr += '\nJob IDs requesting this file to be staged:'.ljust(8)
      tasks = resTasks['Value']
      for tid in tasks.keys():
        outStr += ' %s ' % (tasks[tid]['SourceTaskID'])

    resStageRequests = client.getStageRequests({'ReplicaID': replicaID})

    if not resStageRequests['OK']:
      gLogger.error(resStageRequests['Message'])

    if resStageRequests['Records']:
      stageRequests = resStageRequests['Value']
      outStr += "\n------SRM staging request info--------------"
      for info in stageRequests.values():
        outStr += "\n%s: %s" % ('SRM RequestID'.ljust(8), info['RequestID'].ljust(100))
        outStr += "\n%s: %s" % ('SRM StageStatus'.ljust(8), info['StageStatus'].ljust(100))
        outStr += "\n%s: %s" % ('SRM StageRequestSubmitTime'.ljust(8), str(info['StageRequestSubmitTime']).ljust(100))
        outStr += "\n%s: %s" % ('SRM StageRequestCompletedTime'.ljust(8),
                                str(info['StageRequestCompletedTime']).ljust(100))
        outStr += "\n%s: %s" % ('SRM PinExpiryTime'.ljust(8), str(info['PinExpiryTime']).ljust(100))
        outStr += "\n%s: %s sec" % ('SRM PinLength'.ljust(8), str(info['PinLength']).ljust(100))
    else:
      outStr += '\nThere are no staging requests submitted to the site yet.'.ljust(8)
  else:
    outStr = "\nThere is no such file requested for staging. Check for typo's!"
    # Script.showHelp()
  gLogger.notice(outStr)

  DIRACExit(0)
Пример #2
0
class StageMonitorAgent(AgentModule):
    def initialize(self):
        self.stagerClient = StorageManagerClient()
        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'DataManager')

        return S_OK()

    def execute(self):

        res = getProxyInfo(disableVOMS=True)
        if not res['OK']:
            return res
        self.proxyInfoDict = res['Value']

        res = self.monitorStageRequests()

        return res

    def monitorStageRequests(self):
        """ This is the third logical task manages the StageSubmitted->Staged transition of the Replicas
    """
        res = self.__getStageSubmittedReplicas()
        if not res['OK']:
            gLogger.fatal(
                "StageMonitor.monitorStageRequests: Failed to get replicas from StorageManagementDB.",
                res['Message'])
            return res
        if not res['Value']:
            gLogger.info(
                "StageMonitor.monitorStageRequests: There were no StageSubmitted replicas found"
            )
            return res
        seReplicas = res['Value']['SEReplicas']
        replicaIDs = res['Value']['ReplicaIDs']
        gLogger.info(
            "StageMonitor.monitorStageRequests: Obtained %s StageSubmitted replicas for monitoring."
            % len(replicaIDs))
        for storageElement, seReplicaIDs in seReplicas.items():
            self.__monitorStorageElementStageRequests(storageElement,
                                                      seReplicaIDs, replicaIDs)

        gDataStoreClient.commit()

        return S_OK()

    def __monitorStorageElementStageRequests(self, storageElement,
                                             seReplicaIDs, replicaIDs):
        terminalReplicaIDs = {}
        oldRequests = []
        stagedReplicas = []

        # Since we are in a given SE, the LFN is a unique key
        lfnRepIDs = {}
        lfnReqIDs = {}
        for replicaID in seReplicaIDs:
            lfn = replicaIDs[replicaID]['LFN']
            lfnRepIDs[lfn] = replicaID
            requestID = replicaIDs[replicaID].get('RequestID', None)
            if requestID:
                lfnReqIDs[lfn] = replicaIDs[replicaID]['RequestID']

        gLogger.info(
            "StageMonitor.__monitorStorageElementStageRequests: Monitoring %s stage requests for %s."
            % (len(lfnRepIDs), storageElement))
        oAccounting = DataOperation()
        oAccounting.setStartTime()

        res = StorageElement(storageElement).getFileMetadata(lfnReqIDs)
        if not res['OK']:
            gLogger.error(
                "StageMonitor.__monitorStorageElementStageRequests: Completely failed to monitor stage requests for replicas.",
                res['Message'])
            return
        prestageStatus = res['Value']

        accountingDict = self.__newAccountingDict(storageElement)

        for lfn, reason in prestageStatus['Failed'].items():
            accountingDict['TransferTotal'] += 1
            if re.search('File does not exist', reason):
                gLogger.error(
                    "StageMonitor.__monitorStorageElementStageRequests: LFN did not exist in the StorageElement",
                    lfn)
                terminalReplicaIDs[
                    lfnRepIDs[lfn]] = 'LFN did not exist in the StorageElement'
        for lfn, staged in prestageStatus['Successful'].items():
            if staged and 'Cached' in staged and staged['Cached']:
                accountingDict['TransferTotal'] += 1
                accountingDict['TransferOK'] += 1
                accountingDict['TransferSize'] += staged['Size']
                stagedReplicas.append(lfnRepIDs[lfn])
            if staged and 'Cached' in staged and not staged['Cached']:
                oldRequests.append(lfnRepIDs[lfn])
                # only ReplicaIDs

        oAccounting.setValuesFromDict(accountingDict)
        oAccounting.setEndTime()
        gDataStoreClient.addRegister(oAccounting)

        # Update the states of the replicas in the database
        if terminalReplicaIDs:
            gLogger.info(
                "StageMonitor.__monitorStorageElementStageRequests: %s replicas are terminally failed."
                % len(terminalReplicaIDs))
            res = self.stagerClient.updateReplicaFailure(terminalReplicaIDs)
            if not res['OK']:
                gLogger.error(
                    "StageMonitor.__monitorStorageElementStageRequests: Failed to update replica failures.",
                    res['Message'])
        if stagedReplicas:
            gLogger.info(
                "StageMonitor.__monitorStorageElementStageRequests: %s staged replicas to be updated."
                % len(stagedReplicas))
            res = self.stagerClient.setStageComplete(stagedReplicas)
            if not res['OK']:
                gLogger.error(
                    "StageMonitor.__monitorStorageElementStageRequests: Failed to updated staged replicas.",
                    res['Message'])
            res = self.stagerClient.updateReplicaStatus(
                stagedReplicas, 'Staged')
            if not res['OK']:
                gLogger.error(
                    "StageMonitor.__monitorStorageElementStageRequests: Failed to insert replica status.",
                    res['Message'])
        if oldRequests:
            gLogger.info(
                "StageMonitor.__monitorStorageElementStageRequests: %s old requests will be retried."
                % len(oldRequests))
            res = self.__wakeupOldRequests(oldRequests)
            if not res['OK']:
                gLogger.error(
                    "StageMonitor.__monitorStorageElementStageRequests: Failed to wakeup old requests.",
                    res['Message'])
        return

    def __newAccountingDict(self, storageElement):
        """ Generate a new accounting Dict """

        accountingDict = {}
        accountingDict['OperationType'] = 'Stage'
        accountingDict['User'] = self.proxyInfoDict['username']
        accountingDict['Protocol'] = 'Stager'
        accountingDict['RegistrationTime'] = 0.0
        accountingDict['RegistrationOK'] = 0
        accountingDict['RegistrationTotal'] = 0
        accountingDict['FinalStatus'] = 'Successful'
        accountingDict['Source'] = storageElement
        accountingDict['Destination'] = storageElement
        accountingDict['ExecutionSite'] = siteName()
        accountingDict['TransferTotal'] = 0
        accountingDict['TransferOK'] = 0
        accountingDict['TransferSize'] = 0
        accountingDict['TransferTime'] = self.am_getPollingTime()

        return accountingDict

    def __getStageSubmittedReplicas(self):
        """ This obtains the StageSubmitted replicas from the Replicas table and the RequestID from the StageRequests table """
        res = self.stagerClient.getCacheReplicas({'Status': 'StageSubmitted'})
        if not res['OK']:
            gLogger.error(
                "StageMonitor.__getStageSubmittedReplicas: Failed to get replicas with StageSubmitted status.",
                res['Message'])
            return res
        if not res['Value']:
            gLogger.debug(
                "StageMonitor.__getStageSubmittedReplicas: No StageSubmitted replicas found to process."
            )
            return S_OK()
        else:
            gLogger.debug(
                "StageMonitor.__getStageSubmittedReplicas: Obtained %s StageSubmitted replicas(s) to process."
                % len(res['Value']))

        seReplicas = {}
        replicaIDs = res['Value']
        for replicaID, info in replicaIDs.items():
            storageElement = info['SE']
            if not seReplicas.has_key(storageElement):
                seReplicas[storageElement] = []
            seReplicas[storageElement].append(replicaID)

        # RequestID was missing from replicaIDs dictionary BUGGY?
        res = self.stagerClient.getStageRequests(
            {'ReplicaID': replicaIDs.keys()})
        if not res['OK']:
            return res
        if not res['Value']:
            return S_ERROR(
                'Could not obtain request IDs for replicas %s from StageRequests table'
                % (replicaIDs.keys()))

        for replicaID, info in res['Value'].items():
            reqID = info['RequestID']
            replicaIDs[replicaID]['RequestID'] = reqID

        return S_OK({'SEReplicas': seReplicas, 'ReplicaIDs': replicaIDs})

    def __wakeupOldRequests(self, oldRequests):
        gLogger.info("StageMonitor.__wakeupOldRequests: Attempting...")
        retryInterval = self.am_getOption('RetryIntervalHour', 2)
        res = self.stagerClient.wakeupOldRequests(oldRequests, retryInterval)
        if not res['OK']:
            gLogger.error(
                "StageMonitor.__wakeupOldRequests: Failed to resubmit old requests.",
                res['Message'])
            return res
        return S_OK()
Пример #3
0
                            cacheReplicaInfo[replicaID]['Status'].ljust(100))
    outStr += "\n%s: %s" % ('LastUpdate'.ljust(
        8), str(cacheReplicaInfo[replicaID]['LastUpdate']).ljust(100))
    outStr += "\n%s: %s" % ('Reason'.ljust(
        8), str(cacheReplicaInfo[replicaID]['Reason']).ljust(100))

    resTasks = client.getTasks({'ReplicaID': replicaID})

    if resTasks['OK']:
        # print resTasks['Message']
        outStr += '\nJob IDs requesting this file to be staged:'.ljust(8)
        tasks = resTasks['Value']
        for tid in tasks.keys():
            outStr += ' %s ' % (tasks[tid]['SourceTaskID'])

    resStageRequests = client.getStageRequests({'ReplicaID': replicaID})

    if not resStageRequests['OK']:
        gLogger.error(resStageRequests['Message'])

    if resStageRequests['Records']:
        stageRequests = resStageRequests['Value']
        outStr += "\n------SRM staging request info--------------"
        for info in stageRequests.itervalues():
            outStr += "\n%s: %s" % ('SRM RequestID'.ljust(8),
                                    info['RequestID'].ljust(100))
            outStr += "\n%s: %s" % ('SRM StageStatus'.ljust(8),
                                    info['StageStatus'].ljust(100))
            outStr += "\n%s: %s" % ('SRM StageRequestSubmitTime'.ljust(
                8), str(info['StageRequestSubmitTime']).ljust(100))
            outStr += "\n%s: %s" % ('SRM StageRequestCompletedTime'.ljust(
Пример #4
0
def run():

    from DIRAC.StorageManagementSystem.Client.StorageManagerClient import StorageManagerClient
    client = StorageManagerClient()
    queryDict = {}

    if 'status' in switchDict:
        queryDict['Status'] = str(switchDict['status'])

    if 'se' in switchDict:
        queryDict['SE'] = str(switchDict['se'])

    # weird: if there are no switches (dictionary is empty), then the --limit is ignored!!
    # must FIX that in StorageManagementDB.py!
    # ugly fix:
    newer = '1903-08-02 06:24:38'  # select newer than
    if 'limit' in switchDict:
        gLogger.notice("Query limited to %s entries" % switchDict['limit'])
        res = client.getCacheReplicas(queryDict, None, newer, None, None,
                                      int(switchDict['limit']))
    else:
        res = client.getCacheReplicas(queryDict)

    if not res['OK']:
        gLogger.error(res['Message'])
    outStr = "\n"
    if res['Records']:
        replicas = res['Value']
        outStr += " %s" % ("Status".ljust(15))
        outStr += " %s" % ("LastUpdate".ljust(20))
        outStr += " %s" % ("LFN".ljust(80))
        outStr += " %s" % ("SE".ljust(10))
        outStr += " %s" % ("Reason".ljust(10))
        if 'showJobs' in switchDict:
            outStr += " %s" % ("Jobs".ljust(10))
        outStr += " %s" % ("PinExpiryTime".ljust(15))
        outStr += " %s" % ("PinLength(sec)".ljust(15))
        outStr += "\n"

        for crid, info in replicas.iteritems():
            outStr += " %s" % (info['Status'].ljust(15))
            outStr += " %s" % (str(info['LastUpdate']).ljust(20))
            outStr += " %s" % (info['LFN'].ljust(30))
            outStr += " %s" % (info['SE'].ljust(15))
            outStr += " %s" % (str(info['Reason']).ljust(10))

            # Task info
            if 'showJobs' in switchDict:
                resTasks = client.getTasks({'ReplicaID': crid})
                if resTasks['OK']:
                    if resTasks['Value']:
                        tasks = resTasks['Value']
                        jobs = []
                        for tid in tasks:
                            jobs.append(tasks[tid]['SourceTaskID'])
                        outStr += ' %s ' % (str(jobs).ljust(10))
                else:
                    outStr += ' %s ' % (" --- ".ljust(10))
            # Stage request info
            # what if there's no request to the site yet?
            resStageRequests = client.getStageRequests({'ReplicaID': crid})
            if not resStageRequests['OK']:
                gLogger.error(resStageRequests['Message'])
            if resStageRequests['Records']:
                stageRequests = resStageRequests['Value']
                for info in stageRequests.itervalues():
                    outStr += " %s" % (str(info['PinExpiryTime']).ljust(20))
                    outStr += " %s" % (str(info['PinLength']).ljust(10))
            outStr += "\n"

        gLogger.notice(outStr)
    else:
        gLogger.notice("No entries")
  outStr = "%s\n%s: %s" % ( outStr, 'SE'.ljust( 8 ), cacheReplicaInfo[replicaID]['SE'].ljust( 100 ) )
  outStr = "%s\n%s: %s" % ( outStr, 'PFN'.ljust( 8 ), cacheReplicaInfo[replicaID]['PFN'].ljust( 100 ) )
  outStr = "%s\n%s: %s" % ( outStr, 'Status'.ljust( 8 ), cacheReplicaInfo[replicaID]['Status'].ljust( 100 ) )
  outStr = "%s\n%s: %s" % ( outStr, 'LastUpdate'.ljust( 8 ), str(cacheReplicaInfo[replicaID]['LastUpdate']).ljust( 100 ) )
  outStr = "%s\n%s: %s" % ( outStr, 'Reason'.ljust( 8 ), str( cacheReplicaInfo[replicaID]['Reason']).ljust( 100 ) )
  
  resTasks = client.getTasks({'ReplicaID':replicaID})

  if resTasks['OK']:
    #print resTasks['Message']
    outStr = '%s\nJob IDs requesting this file to be staged:'.ljust( 8) % outStr
    tasks = resTasks['Value']
    for tid in tasks.keys():
      outStr = '%s %s ' % (outStr, tasks[tid]['SourceTaskID'])  

  resStageRequests = client.getStageRequests({'ReplicaID':replicaID})

  if not resStageRequests['OK']:
    print resStageRequests['Message']

  if resStageRequests['Records']:
    stageRequests = resStageRequests['Value']
    outStr = "%s\n------SRM staging request info--------------" % outStr
    for srid in stageRequests.keys():
      outStr = "%s\n%s: %s" % ( outStr, 'SRM RequestID'.ljust( 8 ), stageRequests[srid]['RequestID'].ljust( 100 ) )
      outStr = "%s\n%s: %s" % ( outStr, 'SRM StageStatus'.ljust( 8 ), stageRequests[srid]['StageStatus'].ljust( 100 ) )
      outStr = "%s\n%s: %s" % ( outStr, 'SRM StageRequestSubmitTime'.ljust( 8 ), str(stageRequests[srid]['StageRequestSubmitTime']).ljust( 100 ) )
      outStr = "%s\n%s: %s" % ( outStr, 'SRM StageRequestCompletedTime'.ljust( 8 ), str(stageRequests[srid]['StageRequestCompletedTime']).ljust( 100 ) )
      outStr = "%s\n%s: %s" % ( outStr, 'SRM PinExpiryTime'.ljust( 8 ), str(stageRequests[srid]['PinExpiryTime']).ljust( 100 ) )
      outStr = "%s\n%s: %s sec" % ( outStr, 'SRM PinLength'.ljust( 8 ), str(stageRequests[srid]['PinLength']).ljust( 100 ) )
  else:
Пример #6
0
    outStr = "%s\n%s: %s" % (outStr, "SE".ljust(8), cacheReplicaInfo[replicaID]["SE"].ljust(100))
    outStr = "%s\n%s: %s" % (outStr, "PFN".ljust(8), cacheReplicaInfo[replicaID]["PFN"].ljust(100))
    outStr = "%s\n%s: %s" % (outStr, "Status".ljust(8), cacheReplicaInfo[replicaID]["Status"].ljust(100))
    outStr = "%s\n%s: %s" % (outStr, "LastUpdate".ljust(8), str(cacheReplicaInfo[replicaID]["LastUpdate"]).ljust(100))
    outStr = "%s\n%s: %s" % (outStr, "Reason".ljust(8), str(cacheReplicaInfo[replicaID]["Reason"]).ljust(100))

    resTasks = client.getTasks({"ReplicaID": replicaID})

    if resTasks["OK"]:
        # print resTasks['Message']
        outStr = "%s\nJob IDs requesting this file to be staged:".ljust(8) % outStr
        tasks = resTasks["Value"]
        for tid in tasks.keys():
            outStr = "%s %s " % (outStr, tasks[tid]["SourceTaskID"])

    resStageRequests = client.getStageRequests({"ReplicaID": replicaID})

    if not resStageRequests["OK"]:
        print resStageRequests["Message"]

    if resStageRequests["Records"]:
        stageRequests = resStageRequests["Value"]
        outStr = "%s\n------SRM staging request info--------------" % outStr
        for srid in stageRequests.keys():
            outStr = "%s\n%s: %s" % (outStr, "SRM RequestID".ljust(8), stageRequests[srid]["RequestID"].ljust(100))
            outStr = "%s\n%s: %s" % (outStr, "SRM StageStatus".ljust(8), stageRequests[srid]["StageStatus"].ljust(100))
            outStr = "%s\n%s: %s" % (
                outStr,
                "SRM StageRequestSubmitTime".ljust(8),
                str(stageRequests[srid]["StageRequestSubmitTime"]).ljust(100),
            )
def run():
  
  from DIRAC.StorageManagementSystem.Client.StorageManagerClient import StorageManagerClient
  client = StorageManagerClient()
  queryDict = {}

  dictKeys = switchDict.keys()
  
  if 'status' in dictKeys:
    queryDict['Status'] = str(switchDict['status']) 
  
  
  if 'se' in dictKeys:
    queryDict['SE'] = str(switchDict['se']);
  
  # weird: if there are no switches (dictionary is empty), then the --limit is ignored!!
  # must FIX that in StorageManagementDB.py!
  # ugly fix:
  newer = '1903-08-02 06:24:38' # select newer than 
  if 'limit' in dictKeys:
    print "Query limited to %s entries" %switchDict['limit']   
    res = client.getCacheReplicas(queryDict, None, newer, None, None, int(switchDict['limit']))
  else:
    res = client.getCacheReplicas(queryDict)
  
  if not res['OK']:
    print res['Message']
  outStr ="\n"
  if res['Records']:
    replicas = res['Value']
    outStr = "%s %s" %(outStr, "Status".ljust(15)) 
    outStr = "%s %s" %(outStr, "LastUpdate".ljust(20))  
    outStr = "%s %s" %(outStr, "LFN".ljust(80))   
    outStr = "%s %s" %(outStr, "SE".ljust(10))  
    outStr = "%s %s" %(outStr, "Reason".ljust(10))
    if 'showJobs' in dictKeys:  
      outStr = "%s %s" %(outStr, "Jobs".ljust(10))  
    outStr = "%s %s" %(outStr, "PinExpiryTime".ljust(15))  
    outStr = "%s %s" %(outStr, "PinLength(sec)".ljust(15))  
    outStr = "%s\n" % outStr  
    
    for crid in replicas.keys():
      outStr = "%s %s" %(outStr, replicas[crid]['Status'].ljust( 15 ))
      outStr = "%s %s" %(outStr, str(replicas[crid]['LastUpdate']).ljust( 20 ))
      outStr = "%s %s" %(outStr, replicas[crid]['LFN'].ljust( 30 ))
      outStr = "%s %s" %(outStr, replicas[crid]['SE'].ljust( 15 ))              
      outStr = "%s %s" %(outStr, str(replicas[crid]['Reason']).ljust( 10 ))
 
      # Task info
      if 'showJobs' in dictKeys:
        resTasks = client.getTasks({'ReplicaID':crid})
        if resTasks['OK']:
          if resTasks['Value']:
            tasks = resTasks['Value']
            jobs = []
            for tid in tasks.keys():
              jobs.append(tasks[tid]['SourceTaskID'])      
            outStr = '%s %s ' % (outStr, str(jobs).ljust(10))
        else:
          outStr = '%s %s ' % (outStr, " --- ".ljust(10))     
      # Stage request info
      # what if there's no request to the site yet?
      resStageRequests = client.getStageRequests({'ReplicaID':crid})
      if not resStageRequests['OK']:
        print resStageRequests['Message']
      if resStageRequests['Records']:
        stageRequests = resStageRequests['Value']        
        for srid in stageRequests.keys():
          outStr = "%s %s" %(outStr, str(stageRequests[srid]['PinExpiryTime']).ljust( 20 ))
          outStr = "%s %s" %(outStr, str(stageRequests[srid]['PinLength']).ljust( 10 ))
           
 
      outStr = "%s\n" % outStr  
    print outStr
  else:
    print "No entries"    
    def run():
        global subLogger

        from DIRAC.StorageManagementSystem.Client.StorageManagerClient import StorageManagerClient

        client = StorageManagerClient()
        queryDict = {}

        if "status" in switchDict:
            queryDict["Status"] = str(switchDict["status"])

        if "se" in switchDict:
            queryDict["SE"] = str(switchDict["se"])

        # weird: if there are no switches (dictionary is empty), then the --limit is ignored!!
        # must FIX that in StorageManagementDB.py!
        # ugly fix:
        newer = "1903-08-02 06:24:38"  # select newer than
        if "limit" in switchDict:
            gLogger.notice("Query limited to %s entries" % switchDict["limit"])
            res = client.getCacheReplicas(queryDict, None, newer, None, None,
                                          int(switchDict["limit"]))
        else:
            res = client.getCacheReplicas(queryDict)

        if not res["OK"]:
            gLogger.error(res["Message"])
        outStr = "\n"
        if res["Records"]:
            replicas = res["Value"]
            outStr += " %s" % ("Status".ljust(15))
            outStr += " %s" % ("LastUpdate".ljust(20))
            outStr += " %s" % ("LFN".ljust(80))
            outStr += " %s" % ("SE".ljust(10))
            outStr += " %s" % ("Reason".ljust(10))
            if "showJobs" in switchDict:
                outStr += " %s" % ("Jobs".ljust(10))
            outStr += " %s" % ("PinExpiryTime".ljust(15))
            outStr += " %s" % ("PinLength(sec)".ljust(15))
            outStr += "\n"

            for crid, info in replicas.items():
                outStr += " %s" % (info["Status"].ljust(15))
                outStr += " %s" % (str(info["LastUpdate"]).ljust(20))
                outStr += " %s" % (info["LFN"].ljust(30))
                outStr += " %s" % (info["SE"].ljust(15))
                outStr += " %s" % (str(info["Reason"]).ljust(10))

                # Task info
                if "showJobs" in switchDict:
                    resTasks = client.getTasks({"ReplicaID": crid})
                    if resTasks["OK"]:
                        if resTasks["Value"]:
                            tasks = resTasks["Value"]
                            jobs = []
                            for tid in tasks:
                                jobs.append(tasks[tid]["SourceTaskID"])
                            outStr += " %s " % (str(jobs).ljust(10))
                    else:
                        outStr += " %s " % (" --- ".ljust(10))
                # Stage request info
                # what if there's no request to the site yet?
                resStageRequests = client.getStageRequests({"ReplicaID": crid})
                if not resStageRequests["OK"]:
                    gLogger.error(resStageRequests["Message"])
                if resStageRequests["Records"]:
                    stageRequests = resStageRequests["Value"]
                    for info in stageRequests.values():
                        outStr += " %s" % (str(
                            info["PinExpiryTime"]).ljust(20))
                        outStr += " %s" % (str(info["PinLength"]).ljust(10))
                outStr += "\n"

            gLogger.notice(outStr)
        else:
            gLogger.notice("No entries")
Пример #9
0
class StageMonitorAgent(AgentModule):
    def initialize(self):
        self.stagerClient = StorageManagerClient()
        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption("shifterProxy", "DataManager")
        self.storagePlugins = self.am_getOption("StoragePlugins", [])
        self.dataOpSender = DataOperationSender()

        return S_OK()

    def execute(self):

        res = getProxyInfo(disableVOMS=True)
        if not res["OK"]:
            return res
        self.proxyInfoDict = res["Value"]

        return self.monitorStageRequests()

    def monitorStageRequests(self):
        """This is the third logical task manages the StageSubmitted->Staged transition of the Replicas"""
        res = self.__getStageSubmittedReplicas()
        if not res["OK"]:
            gLogger.fatal(
                "StageMonitor.monitorStageRequests: Failed to get replicas from StorageManagementDB.",
                res["Message"])
            return res
        if not res["Value"]:
            gLogger.info(
                "StageMonitor.monitorStageRequests: There were no StageSubmitted replicas found"
            )
            return res
        seReplicas = res["Value"]["SEReplicas"]
        replicaIDs = res["Value"]["ReplicaIDs"]
        gLogger.info(
            "StageMonitor.monitorStageRequests: Obtained %s StageSubmitted replicas for monitoring."
            % len(replicaIDs))
        for storageElement, seReplicaIDs in seReplicas.items():
            self.__monitorStorageElementStageRequests(storageElement,
                                                      seReplicaIDs, replicaIDs)

        return self.dataOpSender.concludeSending()

    def __monitorStorageElementStageRequests(self, storageElement,
                                             seReplicaIDs, replicaIDs):
        terminalReplicaIDs = {}
        oldRequests = []
        stagedReplicas = []

        # Since we are in a given SE, the LFN is a unique key
        lfnRepIDs = {}
        for replicaID in seReplicaIDs:
            lfn = replicaIDs[replicaID]["LFN"]
            lfnRepIDs[lfn] = replicaID

        if lfnRepIDs:
            gLogger.info(
                "StageMonitor.__monitorStorageElementStageRequests: Monitoring %s stage requests for %s."
                % (len(lfnRepIDs), storageElement))
        else:
            gLogger.warn(
                "StageMonitor.__monitorStorageElementStageRequests: No requests to monitor for %s."
                % storageElement)
            return
        startTime = datetime.datetime.utcnow()
        res = StorageElement(
            storageElement,
            plugins=self.storagePlugins).getFileMetadata(lfnRepIDs)
        if not res["OK"]:
            gLogger.error(
                "StageMonitor.__monitorStorageElementStageRequests: Completely failed to monitor stage requests for replicas",
                res["Message"],
            )
            return
        prestageStatus = res["Value"]

        accountingDict = self.__newAccountingDict(storageElement)

        for lfn, reason in prestageStatus["Failed"].items():
            accountingDict["TransferTotal"] += 1
            if re.search("File does not exist", reason):
                gLogger.error(
                    "StageMonitor.__monitorStorageElementStageRequests: LFN did not exist in the StorageElement",
                    lfn)
                terminalReplicaIDs[
                    lfnRepIDs[lfn]] = "LFN did not exist in the StorageElement"
        for lfn, metadata in prestageStatus["Successful"].items():
            if not metadata:
                continue
            staged = metadata.get("Cached", metadata["Accessible"])
            if staged:
                accountingDict["TransferTotal"] += 1
                accountingDict["TransferOK"] += 1
                accountingDict["TransferSize"] += metadata["Size"]
                stagedReplicas.append(lfnRepIDs[lfn])
            elif staged is not None:
                oldRequests.append(lfnRepIDs[lfn])  # only ReplicaIDs

        # Check if sending data operation to Monitoring
        self.dataOpSender.sendData(accountingDict,
                                   startTime=startTime,
                                   endTime=datetime.datetime.utcnow())
        # Update the states of the replicas in the database
        if terminalReplicaIDs:
            gLogger.info(
                "StageMonitor.__monitorStorageElementStageRequests: %s replicas are terminally failed."
                % len(terminalReplicaIDs))
            res = self.stagerClient.updateReplicaFailure(terminalReplicaIDs)
            if not res["OK"]:
                gLogger.error(
                    "StageMonitor.__monitorStorageElementStageRequests: Failed to update replica failures.",
                    res["Message"],
                )
        if stagedReplicas:
            gLogger.info(
                "StageMonitor.__monitorStorageElementStageRequests: %s staged replicas to be updated."
                % len(stagedReplicas))
            res = self.stagerClient.setStageComplete(stagedReplicas)
            if not res["OK"]:
                gLogger.error(
                    "StageMonitor.__monitorStorageElementStageRequests: Failed to updated staged replicas.",
                    res["Message"],
                )
            res = self.stagerClient.updateReplicaStatus(
                stagedReplicas, "Staged")
            if not res["OK"]:
                gLogger.error(
                    "StageMonitor.__monitorStorageElementStageRequests: Failed to insert replica status.",
                    res["Message"],
                )
        if oldRequests:
            gLogger.info(
                "StageMonitor.__monitorStorageElementStageRequests: %s old requests will be retried."
                % len(oldRequests))
            res = self.__wakeupOldRequests(oldRequests)
            if not res["OK"]:
                gLogger.error(
                    "StageMonitor.__monitorStorageElementStageRequests: Failed to wakeup old requests.",
                    res["Message"])
        return

    def __newAccountingDict(self, storageElement):
        """Generate a new accounting Dict"""

        accountingDict = {}
        accountingDict["OperationType"] = "Stage"
        accountingDict["User"] = self.proxyInfoDict["username"]
        accountingDict["Protocol"] = "Stager"
        accountingDict["RegistrationTime"] = 0.0
        accountingDict["RegistrationOK"] = 0
        accountingDict["RegistrationTotal"] = 0
        accountingDict["FinalStatus"] = "Successful"
        accountingDict["Source"] = storageElement
        accountingDict["Destination"] = storageElement
        accountingDict["ExecutionSite"] = siteName()
        accountingDict["TransferTotal"] = 0
        accountingDict["TransferOK"] = 0
        accountingDict["TransferSize"] = 0
        accountingDict["TransferTime"] = self.am_getPollingTime()

        return accountingDict

    def __getStageSubmittedReplicas(self):
        """This obtains the StageSubmitted replicas from the Replicas table and the RequestID
        from the StageRequests table
        """
        res = self.stagerClient.getCacheReplicas({"Status": "StageSubmitted"})
        if not res["OK"]:
            gLogger.error(
                "StageMonitor.__getStageSubmittedReplicas: Failed to get replicas with StageSubmitted status.",
                res["Message"],
            )
            return res
        if not res["Value"]:
            gLogger.debug(
                "StageMonitor.__getStageSubmittedReplicas: No StageSubmitted replicas found to process."
            )
            return S_OK()
        else:
            gLogger.debug(
                "StageMonitor.__getStageSubmittedReplicas: Obtained %s StageSubmitted replicas(s) to process."
                % len(res["Value"]))

        seReplicas = {}
        replicaIDs = res["Value"]
        for replicaID, info in replicaIDs.items():
            storageElement = info["SE"]
            seReplicas.setdefault(storageElement, []).append(replicaID)

        # RequestID was missing from replicaIDs dictionary BUGGY?
        res = self.stagerClient.getStageRequests(
            {"ReplicaID": list(replicaIDs)})
        if not res["OK"]:
            return res
        if not res["Value"]:
            return S_ERROR(
                "Could not obtain request IDs for replicas %s from StageRequests table"
                % list(replicaIDs))

        for replicaID, info in res["Value"].items():
            replicaIDs[replicaID]["RequestID"] = info["RequestID"]

        return S_OK({"SEReplicas": seReplicas, "ReplicaIDs": replicaIDs})

    def __wakeupOldRequests(self, oldRequests):
        gLogger.info("StageMonitor.__wakeupOldRequests: Attempting...")
        retryInterval = self.am_getOption("RetryIntervalHour", 2)
        res = self.stagerClient.wakeupOldRequests(oldRequests, retryInterval)
        if not res["OK"]:
            gLogger.error(
                "StageMonitor.__wakeupOldRequests: Failed to resubmit old requests.",
                res["Message"])
            return res
        return S_OK()
Пример #10
0
class StageMonitorAgent( AgentModule ):

  def initialize( self ):
    self.replicaManager = ReplicaManager()
    self.stagerClient = StorageManagerClient()
    self.dataIntegrityClient = DataIntegrityClient()
    #self.storageDB = StorageManagementDB()
    # This sets the Default Proxy to used as that defined under
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    return S_OK()

  def execute( self ):

    res = getProxyInfo( disableVOMS = True )
    if not res['OK']:
      return res
    self.proxyInfoDict = res['Value']

    res = self.monitorStageRequests()

    return res

  def monitorStageRequests( self ):
    """ This is the third logical task manages the StageSubmitted->Staged transition of the Replicas
    """
    res = self.__getStageSubmittedReplicas()
    if not res['OK']:
      gLogger.fatal( "StageMonitor.monitorStageRequests: Failed to get replicas from StorageManagementDB.", res['Message'] )
      return res
    if not res['Value']:
      gLogger.info( "StageMonitor.monitorStageRequests: There were no StageSubmitted replicas found" )
      return res
    seReplicas = res['Value']['SEReplicas']
    replicaIDs = res['Value']['ReplicaIDs']
    gLogger.info( "StageMonitor.monitorStageRequests: Obtained %s StageSubmitted replicas for monitoring." % len( replicaIDs ) )
    for storageElement, seReplicaIDs in seReplicas.items():
      self.__monitorStorageElementStageRequests( storageElement, seReplicaIDs, replicaIDs )

    gDataStoreClient.commit()

    return S_OK()

  def __monitorStorageElementStageRequests( self, storageElement, seReplicaIDs, replicaIDs ):
    terminalReplicaIDs = {}
    oldRequests = []
    stagedReplicas = []
    pfnRepIDs = {}
    pfnReqIDs = {}
    for replicaID in seReplicaIDs:
      pfn = replicaIDs[replicaID]['PFN']
      pfnRepIDs[pfn] = replicaID
      requestID = replicaIDs[replicaID].get( 'RequestID', None )
      if requestID:
        pfnReqIDs[pfn] = replicaIDs[replicaID]['RequestID']

    gLogger.info( "StageMonitor.__monitorStorageElementStageRequests: Monitoring %s stage requests for %s." % ( len( pfnRepIDs ), storageElement ) )
    oAccounting = DataOperation()
    oAccounting.setStartTime()

    res = self.replicaManager.getStorageFileMetadata( pfnReqIDs.keys(), storageElement )
    if not res['OK']:
      gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Completely failed to monitor stage requests for replicas.", res['Message'] )
      return
    prestageStatus = res['Value']

    accountingDict = self.__newAccountingDict( storageElement )

    for pfn, reason in prestageStatus['Failed'].items():
      accountingDict['TransferTotal'] += 1
      if re.search( 'File does not exist', reason ):
        gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: PFN did not exist in the StorageElement", pfn )
        terminalReplicaIDs[pfnRepIDs[pfn]] = 'PFN did not exist in the StorageElement'
    for pfn, staged in prestageStatus['Successful'].items():
      if staged and 'Cached' in staged and staged['Cached']:
        accountingDict['TransferTotal'] += 1
        accountingDict['TransferOK'] += 1
        accountingDict['TransferSize'] += staged['Size']
        stagedReplicas.append( pfnRepIDs[pfn] )
      if staged and 'Cached' in staged and not staged['Cached']:
        oldRequests.append( pfnRepIDs[pfn] ); #only ReplicaIDs

    oAccounting.setValuesFromDict( accountingDict )
    oAccounting.setEndTime()
    gDataStoreClient.addRegister( oAccounting )

    # Update the states of the replicas in the database
    if terminalReplicaIDs:
      gLogger.info( "StageMonitor.__monitorStorageElementStageRequests: %s replicas are terminally failed." % len( terminalReplicaIDs ) )
      res = self.stagerClient.updateReplicaFailure( terminalReplicaIDs )
      if not res['OK']:
        gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Failed to update replica failures.", res['Message'] )
    if stagedReplicas:
      gLogger.info( "StageMonitor.__monitorStorageElementStageRequests: %s staged replicas to be updated." % len( stagedReplicas ) )
      res = self.stagerClient.setStageComplete( stagedReplicas )
      if not res['OK']:
        gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Failed to updated staged replicas.", res['Message'] )
      res = self.stagerClient.updateReplicaStatus( stagedReplicas, 'Staged' )
      if not res['OK']:
        gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Failed to insert replica status.", res['Message'] )
    if oldRequests:
      gLogger.info( "StageMonitor.__monitorStorageElementStageRequests: %s old requests will be retried." % len( oldRequests ) )
      res = self.__wakeupOldRequests( oldRequests )
      if not res['OK']:
        gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Failed to wakeup old requests.", res['Message'] )
    return

  def __newAccountingDict( self, storageElement ):
    """ Generate a new accounting Dict """

    accountingDict = {}
    accountingDict['OperationType'] = 'Stage'
    accountingDict['User'] = self.proxyInfoDict['username']
    accountingDict['Protocol'] = 'Stager'
    accountingDict['RegistrationTime'] = 0.0
    accountingDict['RegistrationOK'] = 0
    accountingDict['RegistrationTotal'] = 0
    accountingDict['FinalStatus'] = 'Successful'
    accountingDict['Source'] = storageElement
    accountingDict['Destination'] = storageElement
    accountingDict['ExecutionSite'] = siteName()
    accountingDict['TransferTotal'] = 0
    accountingDict['TransferOK'] = 0
    accountingDict['TransferSize'] = 0
    accountingDict['TransferTime'] = self.am_getPollingTime()

    return accountingDict

  def __getStageSubmittedReplicas( self ):
    """ This obtains the StageSubmitted replicas from the Replicas table and the RequestID from the StageRequests table """
    res = self.stagerClient.getCacheReplicas( {'Status':'StageSubmitted'} )
    if not res['OK']:
      gLogger.error( "StageMonitor.__getStageSubmittedReplicas: Failed to get replicas with StageSubmitted status.", res['Message'] )
      return res
    if not res['Value']:
      gLogger.debug( "StageMonitor.__getStageSubmittedReplicas: No StageSubmitted replicas found to process." )
      return S_OK()
    else:
      gLogger.debug( "StageMonitor.__getStageSubmittedReplicas: Obtained %s StageSubmitted replicas(s) to process." % len( res['Value'] ) )

    seReplicas = {}
    replicaIDs = res['Value']
    for replicaID, info in replicaIDs.items():
      storageElement = info['SE']
      if not seReplicas.has_key( storageElement ):
        seReplicas[storageElement] = []
      seReplicas[storageElement].append( replicaID )

    # RequestID was missing from replicaIDs dictionary BUGGY?
    res = self.stagerClient.getStageRequests( {'ReplicaID':replicaIDs.keys()} )
    if not res['OK']:
      return res
    if not res['Value']:
      return S_ERROR( 'Could not obtain request IDs for replicas %s from StageRequests table' % ( replicaIDs.keys() ) )

    for replicaID, info in res['Value'].items():
      reqID = info['RequestID']
      replicaIDs[replicaID]['RequestID'] = reqID

    return S_OK( {'SEReplicas':seReplicas, 'ReplicaIDs':replicaIDs} )

  def __reportProblematicFiles( self, lfns, reason ):
    return S_OK()
    res = self.dataIntegrityClient.setFileProblematic( lfns, reason,  sourceComponent = 'StageMonitorAgent'  )
    if not res['OK']:
      gLogger.error( "StageMonitor.__reportProblematicFiles: Failed to report missing files.", res['Message'] )
      return res
    if res['Value']['Successful']:
      gLogger.info( "StageMonitor.__reportProblematicFiles: Successfully reported %s missing files." % len( res['Value']['Successful'] ) )
    if res['Value']['Failed']:
      gLogger.info( "StageMonitor.__reportProblematicFiles: Failed to report %s problematic files." % len( res['Value']['Failed'] ) )
    return res

  def __wakeupOldRequests( self, oldRequests ):
    gLogger.info( "StageMonitor.__wakeupOldRequests: Attempting..." )
    retryInterval = self.am_getOption( 'RetryIntervalHour', 2 )
    res = self.stagerClient.wakeupOldRequests( oldRequests, retryInterval )
    if not res['OK']:
      gLogger.error( "StageMonitor.__wakeupOldRequests: Failed to resubmit old requests.", res['Message'] )
      return res
    return S_OK()
Пример #11
0
def main():
    # Registering arguments will automatically add their description to the help menu
    Script.registerArgument("LFN: LFN of the staging file")
    Script.registerArgument("SE: Storage Element for the staging file")
    Script.parseCommandLine(ignoreErrors=True)

    from DIRAC import exit as DIRACExit, gLogger

    lfn, se = Script.getPositionalArgs(group=True)

    from DIRAC.StorageManagementSystem.Client.StorageManagerClient import StorageManagerClient

    client = StorageManagerClient()
    res = client.getCacheReplicas({"LFN": lfn, "SE": se})
    if not res["OK"]:
        gLogger.error(res["Message"])
    cacheReplicaInfo = res["Value"]
    if cacheReplicaInfo:
        replicaID = list(cacheReplicaInfo)[0]
        outStr = "\n--------------------"
        outStr += "\n%s: %s" % ("LFN".ljust(8), cacheReplicaInfo[replicaID]["LFN"].ljust(100))
        outStr += "\n%s: %s" % ("SE".ljust(8), cacheReplicaInfo[replicaID]["SE"].ljust(100))
        outStr += "\n%s: %s" % ("PFN".ljust(8), cacheReplicaInfo[replicaID]["PFN"].ljust(100))
        outStr += "\n%s: %s" % ("Status".ljust(8), cacheReplicaInfo[replicaID]["Status"].ljust(100))
        outStr += "\n%s: %s" % ("LastUpdate".ljust(8), str(cacheReplicaInfo[replicaID]["LastUpdate"]).ljust(100))
        outStr += "\n%s: %s" % ("Reason".ljust(8), str(cacheReplicaInfo[replicaID]["Reason"]).ljust(100))

        resTasks = client.getTasks({"ReplicaID": replicaID})

        if resTasks["OK"]:
            # print resTasks['Message']
            outStr += "\nJob IDs requesting this file to be staged:".ljust(8)
            tasks = resTasks["Value"]
            for tid in tasks.keys():
                outStr += " %s " % (tasks[tid]["SourceTaskID"])

        resStageRequests = client.getStageRequests({"ReplicaID": replicaID})

        if not resStageRequests["OK"]:
            gLogger.error(resStageRequests["Message"])

        if resStageRequests["Records"]:
            stageRequests = resStageRequests["Value"]
            outStr += "\n------SRM staging request info--------------"
            for info in stageRequests.values():
                outStr += "\n%s: %s" % ("SRM RequestID".ljust(8), info["RequestID"].ljust(100))
                outStr += "\n%s: %s" % ("SRM StageStatus".ljust(8), info["StageStatus"].ljust(100))
                outStr += "\n%s: %s" % (
                    "SRM StageRequestSubmitTime".ljust(8),
                    str(info["StageRequestSubmitTime"]).ljust(100),
                )
                outStr += "\n%s: %s" % (
                    "SRM StageRequestCompletedTime".ljust(8),
                    str(info["StageRequestCompletedTime"]).ljust(100),
                )
                outStr += "\n%s: %s" % ("SRM PinExpiryTime".ljust(8), str(info["PinExpiryTime"]).ljust(100))
                outStr += "\n%s: %s sec" % ("SRM PinLength".ljust(8), str(info["PinLength"]).ljust(100))
        else:
            outStr += "\nThere are no staging requests submitted to the site yet.".ljust(8)
    else:
        outStr = "\nThere is no such file requested for staging. Check for typo's!"
        # Script.showHelp()
    gLogger.notice(outStr)

    DIRACExit(0)