Exemplo n.º 1
0
  def test_ParametricChain(self):
    """ This test will submit a parametric job which should generate 3 actual jobs
    """
    wmsClient = WMSClient()
    jobStateUpdate = JobStateUpdateClient()
    jobMonitor = JobMonitoringClient()

    # create the job
    job = parametricJob()
    jobDescription = createFile(job)

    # submit the job
    result = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription))
    self.assertTrue(result['OK'])
    jobIDList = result['Value']
    self.assertEqual(len(jobIDList), 3)

    result = jobMonitor.getJobsParameters(jobIDList, ['JobName'])
    self.assertTrue(result['OK'])
    jobNames = [result['Value'][jobID]['JobName'] for jobID in result['Value']]
    self.assertEqual(set(jobNames), set(['parametric_helloWorld_%s' % nJob for nJob in range(3)]))

    for jobID in jobIDList:
      result = jobStateUpdate.setJobStatus(jobID, 'Done', 'matching', 'source')
      self.assertTrue(result['OK'])

    result = wmsClient.deleteJob(jobIDList)
    self.assertTrue(result['OK'])

    for jobID in jobIDList:
      result = jobMonitor.getJobStatus(jobID)
      self.assertTrue(result['OK'])
      self.assertEqual(result['Value'], 'Deleted')
Exemplo n.º 2
0
    def __report(self, jobID, status, minorStatus):
        """Wraps around setJobStatus of state update client
    """
        jobStatus = JobStateUpdateClient().setJobStatus(
            int(jobID), status, minorStatus, 'JobAgent@%s' % self.siteName)
        self.log.verbose(
            'Setting job status', 'setJobStatus(%s,%s,%s,%s)' %
            (jobID, status, minorStatus, 'JobAgent@%s' % self.siteName))
        if not jobStatus['OK']:
            self.log.warn('Issue setting the job status', jobStatus['Message'])

        return jobStatus
Exemplo n.º 3
0
    def __setJobParam(self, jobID, name, value):
        """Wraps around setJobParameter of state update client
    """
        jobParam = JobStateUpdateClient().setJobParameter(
            int(jobID), str(name), str(value))
        self.log.verbose('Setting job parameter',
                         'setJobParameter(%s,%s,%s)' % (jobID, name, value))
        if not jobParam['OK']:
            self.log.warn('Issue setting the job parameter',
                          jobParam['Message'])

        return jobParam
Exemplo n.º 4
0
    def __setJobParamList(self, value):
        """Wraps around setJobParameters of state update client"""
        # job wrapper template sets the jobID variable
        if "JOBID" not in os.environ:
            self.log.info("Running without JOBID so parameters will not be reported")
            return S_OK()
        jobID = os.environ["JOBID"]
        jobParam = JobStateUpdateClient().setJobParameters(int(jobID), value)
        self.log.verbose("setJobParameters(%s,%s)" % (jobID, value))
        if not jobParam["OK"]:
            self.log.warn(jobParam["Message"])

        return jobParam
Exemplo n.º 5
0
    def __sendSignOfLife(self, jobID, heartBeatDict, staticParamDict):
        """Sends sign of life 'heartbeat' signal and triggers control signal
        interpretation.
        """
        result = JobStateUpdateClient().sendHeartBeat(jobID, heartBeatDict, staticParamDict)
        if not result["OK"]:
            self.log.warn("Problem sending sign of life")
            self.log.warn(result)

        if result["OK"] and result["Value"]:
            self.__interpretControlSignal(result["Value"])

        return result
Exemplo n.º 6
0
    def test_ParametricChain(self):
        """This test will submit a parametric job which should generate 3 actual jobs"""
        wmsClient = WMSClient()
        jobStateUpdate = JobStateUpdateClient()
        jobMonitor = JobMonitoringClient()

        # create the job
        job = parametricJob()
        jobDescription = createFile(job)

        # submit the job
        res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription))
        self.assertTrue(res["OK"], res.get("Message"))
        jobIDList = res["Value"]
        self.assertEqual(len(jobIDList), 3, msg="Got %s" % str(jobIDList))

        res = jobMonitor.getJobsParameters(jobIDList, ["JobName"])
        self.assertTrue(res["OK"], res.get("Message"))
        jobNames = [res["Value"][jobID]["JobName"] for jobID in res["Value"]]
        self.assertEqual(
            set(jobNames),
            set(["parametric_helloWorld_%s" % nJob for nJob in range(3)]))

        for jobID in jobIDList:
            res = jobStateUpdate.setJobStatus(jobID, JobStatus.CHECKING,
                                              "checking", "source")
            self.assertTrue(res["OK"], res.get("Message"))

        res = wmsClient.deleteJob(jobIDList)
        self.assertTrue(res["OK"], res.get("Message"))
        print(res)

        for jobID in jobIDList:
            res = jobMonitor.getJobsStatus(jobID)
            self.assertTrue(res["OK"], res.get("Message"))
            self.assertEqual(res["Value"][jobID]["Status"],
                             JobStatus.DELETED,
                             msg="Got %s" % str(res["Value"]))
Exemplo n.º 7
0
    def test_ParametricChain(self):
        """ This test will submit a parametric job which should generate 3 actual jobs
    """
        wmsClient = WMSClient()
        jobStateUpdate = JobStateUpdateClient()
        jobMonitor = JobMonitoringClient()

        # create the job
        job = parametricJob()
        jobDescription = createFile(job)

        # submit the job
        result = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription))
        self.assertTrue(result['OK'])
        jobIDList = result['Value']
        self.assertEqual(len(jobIDList), 3)

        result = jobMonitor.getJobsParameters(jobIDList, ['JobName'])
        self.assertTrue(result['OK'])
        jobNames = [
            result['Value'][jobID]['JobName'] for jobID in result['Value']
        ]
        self.assertEqual(
            set(jobNames),
            set(['parametric_helloWorld_%s' % nJob for nJob in range(3)]))

        for jobID in jobIDList:
            result = jobStateUpdate.setJobStatus(jobID, 'Done', 'matching',
                                                 'source')
            self.assertTrue(result['OK'])

        result = wmsClient.deleteJob(jobIDList)
        self.assertTrue(result['OK'])

        for jobID in jobIDList:
            result = jobMonitor.getJobStatus(jobID)
            self.assertTrue(result['OK'])
            self.assertEqual(result['Value'], 'Deleted')
Exemplo n.º 8
0
  def __setJobParamList(self, value):
    """Wraps around setJobParameters of state update client
    """
    # job wrapper template sets the jobID variable
    if 'JOBID' not in os.environ:
      self.log.info('Running without JOBID so parameters will not be reported')
      return S_OK()
    jobID = os.environ['JOBID']
    jobParam = JobStateUpdateClient().setJobParameters(int(jobID), value)
    self.log.verbose('setJobParameters(%s,%s)' % (jobID, value))
    if not jobParam['OK']:
      self.log.warn(jobParam['Message'])

    return jobParam
Exemplo n.º 9
0
    def __setJobParam(self, name, value):
        """Wraps around setJobParameter of state update client"""
        if not self.jobID:
            return S_ERROR("JobID not defined")

        self.log.verbose(
            "setting job parameters",
            "setJobParameter(%s,%s,%s)" % (self.jobID, name, value))
        jobParam = JobStateUpdateClient().setJobParameter(
            int(self.jobID), str(name), str(value))
        if not jobParam["OK"]:
            self.log.warn("Failed to set job parameters", jobParam["Message"])

        return jobParam
Exemplo n.º 10
0
    def __setJobParam(self, name, value):
        """Wraps around setJobParameter of state update client
    """
        if not self.jobID:
            return S_ERROR('JobID not defined')

        jobParam = JobStateUpdateClient().setJobParameter(
            int(self.jobID), str(name), str(value))
        self.log.verbose('setJobParameter(%s,%s,%s)' %
                         (self.jobID, name, value))
        if not jobParam['OK']:
            self.log.warn(jobParam['Message'])

        return jobParam
Exemplo n.º 11
0
 def __init__(self, transformationID, transInfoDict, enabled,
              tClient, fcClient, jobMon):
   """Store clients etc."""
   self.log = gLogger.getSubLogger(__name__ + "[%s]" % transformationID)
   self.enabled = enabled
   self.tID = transformationID
   self.transName = transInfoDict['TransformationName']
   self.tClient = tClient
   self.jobMon = jobMon
   self.fcClient = fcClient
   self.transType = transInfoDict['Type']
   self.authorDN = transInfoDict['AuthorDN']
   self.authorGroup = transInfoDict['AuthorGroup']
   self.jobStateClient = JobStateUpdateClient()
Exemplo n.º 12
0
  def __call__(self):
    """ update the job status """
    # # decode arguments
    jobID = self.operation.Arguments
    self.log.info("Performing callback to job %s" % jobID)

    res = JobStateUpdateClient().updateJobFromStager(jobID, 'Done')

    if not res['OK']:
      self.log.error("Error performing the callback to the job", res)
      return res

    self.operation.Status = "Done"
    self.log.info("Callback from staging done")
    return S_OK()
Exemplo n.º 13
0
    def sendStoredJobParameters(self):
        """ Send the job parameters stored in the internal cache
    """

        parameters = [[pname, value[0]]
                      for pname, value in self.jobParameters.items()]
        if parameters:
            result = JobStateUpdateClient().setJobParameters(
                self.jobID, parameters)
            if result['OK']:
                # Empty the internal parameter container
                self.jobParameters = {}
            return result
        else:
            return S_OK('Empty')
Exemplo n.º 14
0
    def test_matcher(self):
        # insert a proper DN to run the test
        resourceDescription = {
            'OwnerGroup': 'prod',
            'OwnerDN':
            '/C=ch/O=DIRAC/OU=DIRAC CI/CN=ciuser/[email protected]',
            'DIRACVersion': 'pippo',
            'ReleaseVersion': 'blabla',
            'VirtualOrganization': 'LHCb',
            'PilotInfoReportedFlag': 'True',
            'PilotBenchmark': 'anotherPilot',
            'Site': 'DIRAC.Jenkins.ch',
            'CPUTime': 86400
        }
        wmsClient = WMSClient()

        job = helloWorldJob()
        job.setDestination('DIRAC.Jenkins.ch')
        job.setInputData('/a/bbb')
        job.setType('User')
        jobDescription = createFile(job)
        res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription))
        self.assertTrue(res['OK'])

        jobID = res['Value']

        res = JobStateUpdateClient().setJobStatus(jobID, 'Waiting', 'matching',
                                                  'source')
        self.assertTrue(res['OK'])

        tqDB = TaskQueueDB()
        tqDefDict = {
            'OwnerDN':
            '/C=ch/O=DIRAC/OU=DIRAC CI/CN=ciuser/[email protected]',
            'OwnerGroup': 'prod',
            'Setup': 'dirac-JenkinsSetup',
            'CPUTime': 86400
        }
        res = tqDB.insertJob(jobID, tqDefDict, 10)
        self.assertTrue(res['OK'])

        res = MatcherClient().requestJob(resourceDescription)
        print(res)
        self.assertTrue(res['OK'])
        wmsClient.deleteJob(jobID)
Exemplo n.º 15
0
    def sendStoredJobParameters(self):
        """ Send the job parameters stored in the internal cache
    """

        parameters = []
        for pname, value in self.jobParameters.items():
            pvalue, _timeStamp = value
            parameters.append([pname, pvalue])

        if parameters:
            result = JobStateUpdateClient().setJobParameters(
                self.jobID, parameters)
            if not result['OK']:
                return result

            if result['OK']:
                # Empty the internal parameter container
                self.jobParameters = {}

            return result
        else:
            return S_OK('Empty')
Exemplo n.º 16
0
    def test_matcher(self):
        # insert a proper DN to run the test
        resourceDescription = {
            "OwnerGroup": "prod",
            "OwnerDN": "/C=ch/O=DIRAC/OU=DIRAC CI/CN=ciuser",
            "DIRACVersion": "pippo",
            "GridCE": "some.grid.ce.org",
            "ReleaseVersion": "blabla",
            "VirtualOrganization": "LHCb",
            "PilotInfoReportedFlag": "True",
            "PilotBenchmark": "anotherPilot",
            "Site": "DIRAC.Jenkins.ch",
            "CPUTime": 86400,
        }
        wmsClient = WMSClient()

        job = helloWorldJob()
        job.setDestination("DIRAC.Jenkins.ch")
        job.setInputData("/a/bbb")
        job.setType("User")
        jobDescription = createFile(job)
        res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription))
        self.assertTrue(res["OK"], res.get("Message"))

        jobID = res["Value"]

        # forcing the update
        res = JobStateUpdateClient().setJobStatus(jobID, JobStatus.WAITING,
                                                  "matching", "source", None,
                                                  True)
        self.assertTrue(res["OK"], res.get("Message"))

        tqDB = TaskQueueDB()
        tqDefDict = {
            "OwnerDN": "/C=ch/O=DIRAC/OU=DIRAC CI/CN=ciuser",
            "OwnerGroup": "prod",
            "Setup": "dirac-JenkinsSetup",
            "CPUTime": 86400,
        }
        res = tqDB.insertJob(jobID, tqDefDict, 10)
        self.assertTrue(res["OK"], res.get("Message"))

        res = MatcherClient().requestJob(resourceDescription)
        print(res)
        self.assertTrue(res["OK"], res.get("Message"))
        wmsClient.deleteJob(jobID)
Exemplo n.º 17
0
  def test_FullChain(self):
    """ This test will

        - call all the WMSClient methods
          that will end up calling all the JobManager service methods
        - use the JobMonitoring to verify few properties
        - call the JobCleaningAgent to eliminate job entries from the DBs
    """
    wmsClient = WMSClient()
    jobMonitor = JobMonitoringClient()
    jobStateUpdate = JobStateUpdateClient()

    # create the job
    job = helloWorldJob()
    jobDescription = createFile(job)

    # submit the job
    res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription))
    self.assertTrue(res['OK'])
    self.assertTrue(isinstance(res['Value'], int))
    self.assertEqual(res['Value'], res['JobID'])
    jobID = res['JobID']
    jobID = res['Value']

    # updating the status
    jobStateUpdate.setJobStatus(jobID, 'Running', 'Executing Minchiapp', 'source')

    # reset the job
    res = wmsClient.resetJob(jobID)
    self.assertTrue(res['OK'])

    # reschedule the job
    res = wmsClient.rescheduleJob(jobID)
    self.assertTrue(res['OK'])
    res = jobMonitor.getJobStatus(jobID)
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value'], 'Received')

    # updating the status again
    jobStateUpdate.setJobStatus(jobID, 'Matched', 'matching', 'source')

    # kill the job
    res = wmsClient.killJob(jobID)
    self.assertTrue(res['OK'])
    res = jobMonitor.getJobStatus(jobID)
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value'], 'Killed')

    # updating the status aaaagain
    jobStateUpdate.setJobStatus(jobID, 'Done', 'matching', 'source')

    # kill the job
    res = wmsClient.killJob(jobID)
    self.assertTrue(res['OK'])
    res = jobMonitor.getJobStatus(jobID)
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value'], 'Done')  # this time it won't kill... it's done!

    # delete the job - this will just set its status to "deleted"
    res = wmsClient.deleteJob(jobID)
    self.assertTrue(res['OK'])
    res = jobMonitor.getJobStatus(jobID)
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value'], 'Deleted')
Exemplo n.º 18
0
  def test_JobStateUpdateAndJobMonitoringMultuple(self):
    """ # Now, let's submit some jobs. Different sites, types, inputs
    """
    wmsClient = WMSClient()
    jobMonitor = JobMonitoringClient()
    jobStateUpdate = JobStateUpdateClient()

    jobIDs = []
    lfnss = [['/a/1.txt', '/a/2.txt'], ['/a/1.txt', '/a/3.txt', '/a/4.txt'], []]
    types = ['User', 'Test']
    for lfns in lfnss:
      for jobType in types:
        job = helloWorldJob()
        job.setDestination('DIRAC.Jenkins.ch')
        job.setInputData(lfns)
        job.setType(jobType)
        jobDescription = createFile(job)
        res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription))
        self.assertTrue(res['OK'])
        jobID = res['Value']
      jobIDs.append(jobID)

    res = jobMonitor.getSites()
    self.assertTrue(res['OK'])
    self.assertTrue(set(res['Value']) <= {'ANY', 'DIRAC.Jenkins.ch'})
    res = jobMonitor.getJobTypes()
    self.assertTrue(res['OK'])
    self.assertEqual(sorted(res['Value']), sorted(types))
    res = jobMonitor.getApplicationStates()
    self.assertTrue(res['OK'])
    self.assertEqual(sorted(res['Value']), sorted(['Unknown']))

    res = jobMonitor.getOwners()
    self.assertTrue(res['OK'])
    res = jobMonitor.getOwnerGroup()
    self.assertTrue(res['OK'])
    res = jobMonitor.getProductionIds()
    self.assertTrue(res['OK'])
    res = jobMonitor.getJobGroups()
    self.assertTrue(res['OK'])
    res = jobMonitor.getStates()
    self.assertTrue(res['OK'])
    self.assertTrue(sorted(res['Value']) in [['Received'], sorted(['Received', 'Waiting'])])
    res = jobMonitor.getMinorStates()
    self.assertTrue(res['OK'])
    self.assertTrue(sorted(res['Value']) in [['Job accepted'], sorted(['Job accepted', 'Job Rescheduled'])])
    self.assertTrue(res['OK'])
    res = jobMonitor.getJobs()
    self.assertTrue(res['OK'])
    self.assertTrue(set([str(x) for x in jobIDs]) <= set(res['Value']))
#     res = jobMonitor.getCounters(attrList)
#     self.assertTrue(res['OK'])
    res = jobMonitor.getCurrentJobCounters()
    self.assertTrue(res['OK'])
    try:
      self.assertTrue(
          res['Value'].get('Received') +
          res['Value'].get('Waiting') >= long(
              len(lfnss) *
              len(types)))
    except TypeError:
      pass
    res = jobMonitor.getJobsSummary(jobIDs)
    self.assertTrue(res['OK'])
    res = jobMonitor.getJobPageSummaryWeb({}, [], 0, 100)
    self.assertTrue(res['OK'])

    res = jobStateUpdate.setJobStatusBulk(jobID,
                                          {str(datetime.datetime.utcnow()): {'Status': 'Running',
                                                                             'MinorStatus': 'MinorStatus',
                                                                             'ApplicationStatus': 'ApplicationStatus',
                                                                             'Source': 'Unknown'}})
    self.assertTrue(res['OK'])
    res = jobStateUpdate.setJobsParameter({jobID: ['Status', 'Running']})
    self.assertTrue(res['OK'])

    # delete the jobs - this will just set its status to "deleted"
    wmsClient.deleteJob(jobIDs)
Exemplo n.º 19
0
  def test_JobStateUpdateAndJobMonitoring(self):
    """ Verifying all JobStateUpdate and JobMonitoring functions
    """
    wmsClient = WMSClient()
    jobMonitor = JobMonitoringClient()
    jobStateUpdate = JobStateUpdateClient()

    # create a job and check stuff
    job = helloWorldJob()
    jobDescription = createFile(job)

    # submitting the job. Checking few stuff
    res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription))
    self.assertTrue(res['OK'])
    jobID = int(res['Value'])
    # jobID = res['JobID']
    res = jobMonitor.getJobJDL(jobID, True)
    self.assertTrue(res['OK'])
    res = jobMonitor.getJobJDL(jobID, False)
    self.assertTrue(res['OK'])
    res = jobMonitor.getJobsParameters([jobID], [])
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value'], {})
    res = jobMonitor.getJobsParameters([jobID], ['Owner'])
    self.assertTrue(res['OK'])

    # Adding stuff
    res = jobStateUpdate.setJobStatus(jobID, 'Matched', 'matching', 'source')
    self.assertTrue(res['OK'])
    res = jobStateUpdate.setJobParameters(jobID, [('par1', 'par1Value'), ('par2', 'par2Value')])
    self.assertTrue(res['OK'])
    res = jobStateUpdate.setJobApplicationStatus(jobID, 'app status', 'source')
    self.assertTrue(res['OK'])
#     res = jobStateUpdate.setJobFlag()
#     self.assertTrue(res['OK'])
#     res = jobStateUpdate.unsetJobFlag()
#     self.assertTrue(res['OK'])
    res = jobStateUpdate.setJobSite(jobID, 'Site')
    self.assertTrue(res['OK'])
#     res = jobMonitor.traceJobParameter( 'Site', 1, 'Status' )
#     self.assertTrue(res['OK'])

    # now checking few things
    res = jobMonitor.getJobStatus(jobID)
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value'], 'Running')
    res = jobMonitor.getJobParameter(jobID, 'par1')
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value'], {'par1': 'par1Value'})
    res = jobMonitor.getJobParameters(jobID)
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value'], {'par1': 'par1Value', 'par2': 'par2Value'})
    res = jobMonitor.getJobAttribute(jobID, 'Site')
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value'], 'Site')
    res = jobMonitor.getJobAttributes(jobID)
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value']['ApplicationStatus'], 'app status')
    self.assertEqual(res['Value']['JobName'], 'helloWorld')
    res = jobMonitor.getJobSummary(jobID)
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value']['ApplicationStatus'], 'app status')
    self.assertEqual(res['Value']['Status'], 'Running')
    res = jobMonitor.getJobHeartBeatData(jobID)
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value'], [])
    res = jobMonitor.getInputData(jobID)
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value'], [])
    res = jobMonitor.getJobPrimarySummary(jobID)
    self.assertTrue(res['OK'])
    res = jobMonitor.getAtticJobParameters(jobID)
    self.assertTrue(res['OK'])
    res = jobStateUpdate.setJobsStatus([jobID], 'Done', 'MinorStatus', 'Unknown')
    self.assertTrue(res['OK'])
    res = jobMonitor.getJobSummary(jobID)
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value']['Status'], 'Done')
    self.assertEqual(res['Value']['MinorStatus'], 'MinorStatus')
    self.assertEqual(res['Value']['ApplicationStatus'], 'app status')
    res = jobStateUpdate.sendHeartBeat(jobID, {'bih': 'bih'}, {'boh': 'boh'})
    self.assertTrue(res['OK'])

    # delete the job - this will just set its status to "deleted"
    wmsClient.deleteJob(jobID)
Exemplo n.º 20
0
    def test_JobStateUpdateAndJobMonitoring(self):
        """Verifying all JobStateUpdate and JobMonitoring functions"""
        wmsClient = WMSClient()
        jobMonitor = JobMonitoringClient()
        jobStateUpdate = JobStateUpdateClient()

        # create a job and check stuff
        job = helloWorldJob()
        jobDescription = createFile(job)

        # submitting the job. Checking few stuff
        res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription))
        self.assertTrue(res["OK"], res.get("Message"))
        jobID = int(res["Value"])
        # jobID = res['JobID']
        res = jobMonitor.getJobJDL(jobID, True)
        self.assertTrue(res["OK"], res.get("Message"))
        res = jobMonitor.getJobJDL(jobID, False)
        self.assertTrue(res["OK"], res.get("Message"))
        res = jobMonitor.getJobsParameters([jobID], [])
        self.assertTrue(res["OK"], res.get("Message"))
        res = jobMonitor.getJobOwner(jobID)
        self.assertTrue(res["OK"], res.get("Message"))

        # Adding stuff

        # forcing the update
        res = jobStateUpdate.setJobStatus(jobID, JobStatus.RUNNING, "running",
                                          "source", None, True)
        self.assertTrue(res["OK"], res.get("Message"))
        res = jobStateUpdate.setJobParameters(jobID, [("par1", "par1Value"),
                                                      ("par2", "par2Value")])
        time.sleep(5)
        self.assertTrue(res["OK"], res.get("Message"))
        res = jobStateUpdate.setJobApplicationStatus(jobID, "app status",
                                                     "source")
        self.assertTrue(res["OK"], res.get("Message"))
        #     res = jobStateUpdate.setJobFlag()
        #     self.assertTrue(res['OK'], res.get('Message'))
        #     res = jobStateUpdate.unsetJobFlag()
        #     self.assertTrue(res['OK'], res.get('Message'))
        res = jobStateUpdate.setJobSite(jobID, "Site")
        self.assertTrue(res["OK"], res.get("Message"))

        # now checking few things
        res = jobMonitor.getJobsStatus(jobID)
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertEqual(res["Value"][jobID]["Status"],
                         JobStatus.RUNNING,
                         msg="Got %s" % str(res["Value"]))
        res = jobMonitor.getJobParameter(jobID, "par1")
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertEqual(res["Value"], {"par1": "par1Value"},
                         msg="Got %s" % str(res["Value"]))
        res = jobMonitor.getJobParameters(jobID)
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertEqual(res["Value"],
                         {jobID: {
                             "par1": "par1Value",
                             "par2": "par2Value"
                         }},
                         msg="Got %s" % str(res["Value"]))
        res = jobMonitor.getJobParameters(jobID, "par1")
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertEqual(res["Value"], {jobID: {
            "par1": "par1Value"
        }},
                         msg="Got %s" % str(res["Value"]))
        res = jobMonitor.getJobAttribute(jobID, "Site")
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertEqual(res["Value"],
                         "Site",
                         msg="Got %s" % str(res["Value"]))
        res = jobMonitor.getJobAttributes(jobID)
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertEqual(res["Value"]["ApplicationStatus"],
                         "app status",
                         msg="Got %s" % str(res["Value"]["ApplicationStatus"]))
        self.assertEqual(res["Value"]["JobName"],
                         "helloWorld",
                         msg="Got %s" % str(res["Value"]["JobName"]))
        res = jobMonitor.getJobSummary(jobID)
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertEqual(res["Value"]["ApplicationStatus"],
                         "app status",
                         msg="Got %s" % str(res["Value"]["ApplicationStatus"]))
        self.assertEqual(res["Value"]["Status"],
                         JobStatus.RUNNING,
                         msg="Got %s" % str(res["Value"]["Status"]))
        res = jobMonitor.getJobHeartBeatData(jobID)
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertEqual(res["Value"], [], msg="Got %s" % str(res["Value"]))
        res = jobMonitor.getInputData(jobID)
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertEqual(res["Value"], [], msg="Got %s" % str(res["Value"]))
        res = jobMonitor.getJobSummary(jobID)
        self.assertTrue(res["OK"], res.get("Message"))
        res = jobMonitor.getAtticJobParameters(jobID)
        self.assertTrue(res["OK"], res.get("Message"))
        res = jobStateUpdate.setJobStatus(jobID, JobStatus.DONE, "MinorStatus",
                                          "Unknown")
        self.assertTrue(res["OK"], res.get("Message"))
        res = jobMonitor.getJobSummary(jobID)
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertEqual(res["Value"]["Status"],
                         JobStatus.DONE,
                         msg="Got %s" % str(res["Value"]["Status"]))
        self.assertEqual(res["Value"]["MinorStatus"],
                         "MinorStatus",
                         msg="Got %s" % str(res["Value"]["MinorStatus"]))
        self.assertEqual(res["Value"]["ApplicationStatus"],
                         "app status",
                         msg="Got %s" % str(res["Value"]["ApplicationStatus"]))
        res = jobStateUpdate.sendHeartBeat(jobID, {"bih": "bih"},
                                           {"boh": "boh"})
        self.assertTrue(res["OK"], res.get("Message"))

        # delete the job - this will just set its status to "deleted"
        wmsClient.deleteJob(jobID)
Exemplo n.º 21
0
  def test_FullChain(self):
    """ This test will

        - call all the WMSClient methods
          that will end up calling all the JobManager service methods
        - use the JobMonitoring to verify few properties
        - call the JobCleaningAgent to eliminate job entries from the DBs
    """
    wmsClient = WMSClient()
    jobMonitor = JobMonitoringClient()
    jobStateUpdate = JobStateUpdateClient()

    # create the job
    job = helloWorldJob()
    jobDescription = createFile(job)

    # submit the job
    res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription))
    self.assertTrue(res['OK'])
    self.assertTrue(isinstance(res['Value'], int))
    self.assertEqual(res['Value'], res['JobID'])
    jobID = res['JobID']
    jobID = res['Value']

    # updating the status
    jobStateUpdate.setJobStatus(jobID, 'Running', 'Executing Minchiapp', 'source')

    # reset the job
    res = wmsClient.resetJob(jobID)
    self.assertTrue(res['OK'])

    # reschedule the job
    res = wmsClient.rescheduleJob(jobID)
    self.assertTrue(res['OK'])
    res = jobMonitor.getJobStatus(jobID)
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value'], 'Received')

    # updating the status again
    jobStateUpdate.setJobStatus(jobID, 'Matched', 'matching', 'source')

    # kill the job
    res = wmsClient.killJob(jobID)
    self.assertTrue(res['OK'])
    res = jobMonitor.getJobStatus(jobID)
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value'], 'Killed')

    # updating the status aaaagain
    jobStateUpdate.setJobStatus(jobID, 'Done', 'matching', 'source')

    # kill the job
    res = wmsClient.killJob(jobID)
    self.assertTrue(res['OK'])
    res = jobMonitor.getJobStatus(jobID)
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value'], 'Done')  # this time it won't kill... it's done!

    # delete the job - this will just set its status to "deleted"
    res = wmsClient.deleteJob(jobID)
    self.assertTrue(res['OK'])
    res = jobMonitor.getJobStatus(jobID)
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value'], 'Deleted')
Exemplo n.º 22
0
    def test_JobStateUpdateAndJobMonitoringMultuple(self):
        """# Now, let's submit some jobs. Different sites, types, inputs"""
        wmsClient = WMSClient()
        jobMonitor = JobMonitoringClient()
        jobStateUpdate = JobStateUpdateClient()

        jobIDs = []
        lfnss = [["/a/1.txt", "/a/2.txt"],
                 ["/a/1.txt", "/a/3.txt", "/a/4.txt"], []]
        types = ["User", "Test"]
        for lfns in lfnss:
            for jobType in types:
                job = helloWorldJob()
                job.setDestination("DIRAC.Jenkins.ch")
                job.setInputData(lfns)
                job.setType(jobType)
                jobDescription = createFile(job)
                res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription))
                self.assertTrue(res["OK"], res.get("Message"))
                jobID = res["Value"]
            jobIDs.append(jobID)

        res = jobMonitor.getSites()
        print(res)
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertTrue(
            set(res["Value"]) <= {"ANY", "DIRAC.Jenkins.ch", "Site"},
            msg="Got %s" % res["Value"])
        res = jobMonitor.getJobTypes()
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertEqual(sorted(res["Value"]),
                         sorted(types),
                         msg="Got %s" % str(sorted(res["Value"])))
        res = jobMonitor.getApplicationStates()
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertEqual(res["Value"], ["app status", "Unknown"],
                         msg="Got %s" % str(res["Value"]))

        res = jobMonitor.getOwners()
        self.assertTrue(res["OK"], res.get("Message"))
        res = jobMonitor.getOwnerGroup()
        self.assertTrue(res["OK"], res.get("Message"))
        res = jobMonitor.getProductionIds()
        self.assertTrue(res["OK"], res.get("Message"))
        res = jobMonitor.getJobGroups()
        self.assertTrue(res["OK"], res.get("Message"))
        resJG_empty = res["Value"]
        res = jobMonitor.getJobGroups(None, datetime.datetime.utcnow())
        self.assertTrue(res["OK"], res.get("Message"))
        resJG_olderThanNow = res["Value"]
        self.assertEqual(resJG_empty, resJG_olderThanNow)
        res = jobMonitor.getJobGroups(
            None,
            datetime.datetime.utcnow() - datetime.timedelta(days=365))
        self.assertTrue(res["OK"], res.get("Message"))
        resJG_olderThanOneYear = res["Value"]
        self.assertTrue(
            set(resJG_olderThanOneYear).issubset(set(resJG_olderThanNow)),
            resJG_olderThanOneYear)
        res = jobMonitor.getStates()
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertTrue(
            sorted(res["Value"])
            in [[JobStatus.RECEIVED],
                sorted([JobStatus.RECEIVED, JobStatus.KILLED])], res["Value"])
        res = jobMonitor.getMinorStates()
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertTrue(
            sorted(res["Value"]) in [
                ["Job accepted"],
                sorted(["Job accepted", "Job Rescheduled"]),
                sorted(["Job accepted", "Marked for termination"]),
            ],
            res["Value"],
        )
        self.assertTrue(res["OK"], res.get("Message"))
        res = jobMonitor.getJobs()
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertTrue(
            set([str(x) for x in jobIDs]) <= set(res["Value"]), res["Value"])
        #     res = jobMonitor.getCounters(attrList)
        #     self.assertTrue(res['OK'], res.get('Message'))
        res = jobMonitor.getJobsSummary(jobIDs)
        self.assertTrue(res["OK"], res.get("Message"))
        res = jobMonitor.getJobPageSummaryWeb({}, [], 0, 100)
        self.assertTrue(res["OK"], res.get("Message"))

        res = jobStateUpdate.setJobStatusBulk(
            jobID,
            {
                str(datetime.datetime.utcnow()): {
                    "Status": JobStatus.CHECKING,
                    "MinorStatus": "MinorStatus",
                    "Source": "Unknown",
                }
            },
            False,
        )
        self.assertTrue(res["OK"], res.get("Message"))
        res = jobMonitor.getJobSummary(int(jobID))
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertEqual(res["Value"]["Status"], JobStatus.CHECKING)
        self.assertEqual(res["Value"]["MinorStatus"], "MinorStatus")

        res = jobStateUpdate.setJobStatusBulk(
            jobID,
            {
                str(datetime.datetime.utcnow() + datetime.timedelta(hours=1)):
                {
                    "Status": JobStatus.WAITING,
                    "MinorStatus": "MinorStatus",
                    "Source": "Unknown",
                },
                str(datetime.datetime.utcnow() + datetime.timedelta(hours=2)):
                {
                    "Status": JobStatus.MATCHED,
                    "MinorStatus": "MinorStatus-matched",
                    "Source": "Unknown",
                },
            },
            False,
        )
        self.assertTrue(res["OK"], res.get("Message"))
        res = jobMonitor.getJobSummary(int(jobID))
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertEqual(res["Value"]["Status"], JobStatus.MATCHED)
        self.assertEqual(res["Value"]["MinorStatus"], "MinorStatus-matched")

        res = jobStateUpdate.setJobsParameter({jobID: ["Whatever", "booh"]})
        self.assertTrue(res["OK"], res.get("Message"))

        res = jobMonitor.getJobSummary(int(jobID))
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertEqual(res["Value"]["Status"], JobStatus.MATCHED)
        self.assertEqual(res["Value"]["MinorStatus"], "MinorStatus-matched")

        res = jobStateUpdate.setJobAttribute(jobID, "Status",
                                             JobStatus.RUNNING)
        self.assertTrue(res["OK"], res.get("Message"))

        res = jobMonitor.getJobSummary(int(jobID))
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertEqual(res["Value"]["Status"], JobStatus.RUNNING)

        # delete the jobs - this will just set its status to "deleted"
        wmsClient.deleteJob(jobIDs)
Exemplo n.º 23
0
  def test_JobStateUpdateAndJobMonitoring(self):
    """ Verifying all JobStateUpdate and JobMonitoring functions
    """
    wmsClient = WMSClient()
    jobMonitor = JobMonitoringClient()
    jobStateUpdate = JobStateUpdateClient()

    # create a job and check stuff
    job = helloWorldJob()
    jobDescription = createFile(job)

    # submitting the job. Checking few stuff
    res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription))
    self.assertTrue(res['OK'])
    jobID = int(res['Value'])
    # jobID = res['JobID']
    res = jobMonitor.getJobJDL(jobID, True)
    self.assertTrue(res['OK'])
    res = jobMonitor.getJobJDL(jobID, False)
    self.assertTrue(res['OK'])
    res = jobMonitor.getJobsParameters([jobID], [])
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value'], {})
    res = jobMonitor.getJobsParameters([jobID], ['Owner'])
    self.assertTrue(res['OK'])

    # Adding stuff
    res = jobStateUpdate.setJobStatus(jobID, 'Matched', 'matching', 'source')
    self.assertTrue(res['OK'])
    res = jobStateUpdate.setJobParameters(jobID, [('par1', 'par1Value'), ('par2', 'par2Value')])
    self.assertTrue(res['OK'])
    res = jobStateUpdate.setJobApplicationStatus(jobID, 'app status', 'source')
    self.assertTrue(res['OK'])
#     res = jobStateUpdate.setJobFlag()
#     self.assertTrue(res['OK'])
#     res = jobStateUpdate.unsetJobFlag()
#     self.assertTrue(res['OK'])
    res = jobStateUpdate.setJobSite(jobID, 'Site')
    self.assertTrue(res['OK'])
#     res = jobMonitor.traceJobParameter( 'Site', 1, 'Status' )
#     self.assertTrue(res['OK'])

    # now checking few things
    res = jobMonitor.getJobStatus(jobID)
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value'], 'Running')
    res = jobMonitor.getJobParameter(jobID, 'par1')
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value'], {'par1': 'par1Value'})
    res = jobMonitor.getJobParameters(jobID)
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value'], {jobID: {'par1': 'par1Value', 'par2': 'par2Value'}})
    res = jobMonitor.getJobParameters(jobID, 'par1')
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value'], {jobID: {'par1': 'par1Value'}})
    res = jobMonitor.getJobAttribute(jobID, 'Site')
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value'], 'Site')
    res = jobMonitor.getJobAttributes(jobID)
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value']['ApplicationStatus'], 'app status')
    self.assertEqual(res['Value']['JobName'], 'helloWorld')
    res = jobMonitor.getJobSummary(jobID)
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value']['ApplicationStatus'], 'app status')
    self.assertEqual(res['Value']['Status'], 'Running')
    res = jobMonitor.getJobHeartBeatData(jobID)
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value'], [])
    res = jobMonitor.getInputData(jobID)
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value'], [])
    res = jobMonitor.getJobPrimarySummary(jobID)
    self.assertTrue(res['OK'])
    res = jobMonitor.getAtticJobParameters(jobID)
    self.assertTrue(res['OK'])
    res = jobStateUpdate.setJobsStatus([jobID], 'Done', 'MinorStatus', 'Unknown')
    self.assertTrue(res['OK'])
    res = jobMonitor.getJobSummary(jobID)
    self.assertTrue(res['OK'])
    self.assertEqual(res['Value']['Status'], 'Done')
    self.assertEqual(res['Value']['MinorStatus'], 'MinorStatus')
    self.assertEqual(res['Value']['ApplicationStatus'], 'app status')
    res = jobStateUpdate.sendHeartBeat(jobID, {'bih': 'bih'}, {'boh': 'boh'})
    self.assertTrue(res['OK'])

    # delete the job - this will just set its status to "deleted"
    wmsClient.deleteJob(jobID)
Exemplo n.º 24
0
class TransformationInfo(object):
  """Hold information about a transformation."""

  def __init__(self, transformationID, transInfoDict, enabled,
               tClient, fcClient, jobMon):
    """Store clients etc."""
    self.log = gLogger.getSubLogger(__name__ + "[%s]" % transformationID)
    self.enabled = enabled
    self.tID = transformationID
    self.transName = transInfoDict['TransformationName']
    self.tClient = tClient
    self.jobMon = jobMon
    self.fcClient = fcClient
    self.transType = transInfoDict['Type']
    self.authorDN = transInfoDict['AuthorDN']
    self.authorGroup = transInfoDict['AuthorGroup']
    self.jobStateClient = JobStateUpdateClient()

  def checkTasksStatus( self ):
    """Check the status for the task of given transformation and taskID"""

    res = self.tClient.getTransformationFiles( condDict = { 'TransformationID': self.tID } )
    if not res['OK']:
      raise RuntimeError( "Failed to get transformation tasks: %s" % res['Message'] )

    tasksDict = {}
    for task in res['Value']:
      taskID = task['TaskID']
      lfn = task['LFN']
      status = task['Status']
      fileID = task['FileID']
      errorCount = task['ErrorCount']
      tasksDict[taskID] = dict( FileID=fileID, LFN=lfn, Status=status, ErrorCount=errorCount )

    return tasksDict

  def setJobDone( self, job ):
    """ set the taskID to Done"""
    if not self.enabled:
      return
    self.__setTaskStatus( job, 'Done' )
    if job.status != 'Done':
      self.__updateJobStatus( job.jobID, 'Done', "Job forced to Done" )

  def setJobFailed( self, job ):
    """ set the taskID to Done"""
    if not self.enabled:
      return
    self.__setTaskStatus( job, 'Failed' )
    if job.status != 'Failed':
      self.__updateJobStatus( job.jobID, "Failed", "Job forced to Failed" )

  def setInputUnused( self, job ):
    """set the inputfile to unused"""
    self.__setInputStatus( job, "Unused" )

  def setInputMaxReset( self, job ):
    """set the inputfile to MaxReset"""
    self.__setInputStatus( job, "MaxReset" )

  def setInputProcessed( self, job ):
    """set the inputfile to processed"""
    self.__setInputStatus( job, "Processed" )

  def setInputDeleted( self, job ):
    """set the inputfile to processed"""
    self.__setInputStatus( job, "Deleted" )

  def __setInputStatus( self, job, status ):
    """set the input file to status"""
    if self.enabled:
      result = self.tClient.setFileStatusForTransformation(self.tID, status, [job.inputFile], force = True)
      if not result['OK']:
        gLogger.error( "Failed updating status", result['Message'] )
        raise RuntimeError( "Failed updating file status" )

  def __setTaskStatus( self, job, status ):
    """update the task in the TransformationDB"""
    taskID = job.taskID
    res = self.tClient.setTaskStatus( self.transName, taskID, status )
    if not res['OK']:
      raise RuntimeError( "Failed updating task status: %s" % res['Message'] )

  def __updateJobStatus(self, jobID, status, minorstatus=''):
    """Update the job status."""
    if self.enabled:
      source = 'DataRecoveryAgent'
      result = self.jobStateClient.setJobStatus(jobID, status, minorstatus, source)
    else:
      return S_OK('DisabledMode')
    if not result['OK']:
      self.log.error('Failed to update job status', result['Message'])
      raise RuntimeError('Failed to update job status')
    return result

  def __findAllDescendants( self, lfnList ):
    """finds all descendants of a list of LFNs"""
    allDescendants = []
    result = self.fcClient.getFileDescendents( lfnList, range(1,8) )
    if not result['OK']:
      return allDescendants
    for dummy_lfn, descendants in result['Value']['Successful'].items():
      allDescendants.extend( descendants )
    return allDescendants

  def cleanOutputs( self, jobInfo ):
    """remove all job outputs"""
    if len(jobInfo.outputFiles) == 0:
      return
    descendants = self.__findAllDescendants( jobInfo.outputFiles )
    existingOutputFiles = [ lfn for lfn, status in izip_longest(jobInfo.outputFiles, jobInfo.outputFileStatus) if status=="Exists" ]
    filesToDelete = existingOutputFiles + descendants

    if not filesToDelete:
      return

    if not self.enabled:
      self.log.notice( "Would have removed these files: \n +++ %s " % "\n +++ ".join(filesToDelete) )
      return
    self.log.notice( "Remove these files: \n +++ %s " % "\n +++ ".join(filesToDelete) )

    errorReasons = defaultdict(list)
    successfullyRemoved = 0

    for lfnList in breakListIntoChunks(filesToDelete, 200):
      with UserProxy(proxyUserDN=self.authorDN, proxyUserGroup=self.authorGroup) as proxyResult:
        if not proxyResult['OK']:
          raise RuntimeError('Failed to get a proxy: %s' % proxyResult['Message'])
        result = DataManager().removeFile(lfnList)
        if not result['OK']:
          self.log.error("Failed to remove LFNs", result['Message'])
          raise RuntimeError("Failed to remove LFNs: %s" % result['Message'])
        for lfn, err in result['Value']['Failed'].items():
          reason = str(err)
          errorReasons[reason].append(lfn)
        successfullyRemoved += len(result['Value']['Successful'].keys())
    for reason, lfns in errorReasons.items():
      self.log.error("Failed to remove %d files with error: %s" % (len(lfns), reason))
    self.log.notice("Successfully removed %d files" % successfullyRemoved)

  def getJobs( self, statusList=None ):
    """get done and failed jobs"""
    done = S_OK([])
    failed = S_OK([])
    if statusList is None:
      statusList = [ 'Done', 'Failed' ]
    if 'Done' in statusList:
      self.log.notice( "Getting 'Done' Jobs..." )
      done = self.__getJobs( ["Done"] )
    if 'Failed' in statusList:
      self.log.notice( "Getting 'Failed' Jobs..." )
      failed = self.__getJobs( ["Failed"] )
    done = done['Value']
    failed = failed['Value']

    jobsUnsorted = {}
    for job in done:
      jobsUnsorted[int(job)] = JobInfo( job, "Done", self.tID, self.transType )
    for job in failed:
      jobsUnsorted[int(job)] = JobInfo( job, "Failed", self.tID, self.transType )
    jobs = OrderedDict( sorted(jobsUnsorted.items(), key=lambda t: t[0]) )

    self.log.notice( "Found %d Done Jobs " % len(done) )
    self.log.notice( "Found %d Failed Jobs " % len(failed) )
    return jobs, len(done), len(failed)

  def __getJobs( self, status ):
    """returns list of done jobs"""
    attrDict = dict( Status=status, JobGroup="%08d" % int(self.tID) )
    # if 'Done' in status:
    #   resAppStates = self.jobMon.getApplicationStates()
    #   if not resAppStates['OK']:
    #     raise RuntimeError( "Failed to get application states" )
    #   appStates = resAppStates['Value']
    #   appStates.remove( "Job Finished Successfully" )
    #   attrDict['ApplicationStatus'] = appStates
    res = self.jobMon.getJobs( attrDict )
    if res['OK']:
      self.log.debug("Found Prod jobs: %s" % res['Value'] )
      return res
    else:
      self.log.error("Error finding jobs: ", res['Message'] )
      raise RuntimeError( "Failed to get jobs" )
Exemplo n.º 25
0
  def test_JobStateUpdateAndJobMonitoringMultuple(self):
    """ # Now, let's submit some jobs. Different sites, types, inputs
    """
    wmsClient = WMSClient()
    jobMonitor = JobMonitoringClient()
    jobStateUpdate = JobStateUpdateClient()

    jobIDs = []
    lfnss = [['/a/1.txt', '/a/2.txt'], ['/a/1.txt', '/a/3.txt', '/a/4.txt'], []]
    types = ['User', 'Test']
    for lfns in lfnss:
      for jobType in types:
        job = helloWorldJob()
        job.setDestination('DIRAC.Jenkins.ch')
        job.setInputData(lfns)
        job.setType(jobType)
        jobDescription = createFile(job)
        res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription))
        self.assertTrue(res['OK'])
        jobID = res['Value']
      jobIDs.append(jobID)

    res = jobMonitor.getSites()
    print res
    self.assertTrue(res['OK'])
    self.assertTrue(set(res['Value']) <= {'ANY', 'DIRAC.Jenkins.ch'})
    res = jobMonitor.getJobTypes()
    self.assertTrue(res['OK'])
    self.assertEqual(sorted(res['Value']), sorted(types))
    res = jobMonitor.getApplicationStates()
    self.assertTrue(res['OK'])
    self.assertEqual(sorted(res['Value']), sorted(['Unknown']))

    res = jobMonitor.getOwners()
    self.assertTrue(res['OK'])
    res = jobMonitor.getOwnerGroup()
    self.assertTrue(res['OK'])
    res = jobMonitor.getProductionIds()
    self.assertTrue(res['OK'])
    res = jobMonitor.getJobGroups()
    self.assertTrue(res['OK'])
    res = jobMonitor.getStates()
    self.assertTrue(res['OK'])
    self.assertTrue(sorted(res['Value']) in [['Received'], sorted(['Received', 'Waiting'])])
    res = jobMonitor.getMinorStates()
    self.assertTrue(res['OK'])
    self.assertTrue(sorted(res['Value']) in [['Job accepted'], sorted(['Job accepted', 'Job Rescheduled'])])
    self.assertTrue(res['OK'])
    res = jobMonitor.getJobs()
    self.assertTrue(res['OK'])
    self.assertTrue(set([str(x) for x in jobIDs]) <= set(res['Value']))
#     res = jobMonitor.getCounters(attrList)
#     self.assertTrue(res['OK'])
    res = jobMonitor.getCurrentJobCounters()
    self.assertTrue(res['OK'])
    try:
      self.assertTrue(
          res['Value'].get('Received') +
          res['Value'].get('Waiting') >= long(
              len(lfnss) *
              len(types)))
    except TypeError:
      pass
    res = jobMonitor.getJobsSummary(jobIDs)
    self.assertTrue(res['OK'])
    res = jobMonitor.getJobPageSummaryWeb({}, [], 0, 100)
    self.assertTrue(res['OK'])

    res = jobStateUpdate.setJobStatusBulk(jobID,
                                          {str(datetime.datetime.utcnow()): {'Status': 'Running',
                                                                             'MinorStatus': 'MinorStatus',
                                                                             'ApplicationStatus': 'ApplicationStatus',
                                                                             'Source': 'Unknown'}})
    self.assertTrue(res['OK'])
    res = jobStateUpdate.setJobsParameter({jobID: ['Status', 'Running']})
    self.assertTrue(res['OK'])

    # delete the jobs - this will just set its status to "deleted"
    wmsClient.deleteJob(jobIDs)
Exemplo n.º 26
0
    def finalizeRequest(self, requestID, jobID, useCertificates=True):
        """check request status and perform finalization if necessary
            update the request status and the corresponding job parameter

        :param self: self reference
        :param str requestID: request id
        :param int jobID: job id
        """

        stateServer = JobStateUpdateClient(useCertificates=useCertificates)

        # Checking if to update the job status - we should fail here, so it will be re-tried later
        # Checking the state, first
        res = self.getRequestStatus(requestID)
        if not res["OK"]:
            self.log.error(
                "finalizeRequest: failed to get request",
                "request: %s status: %s" % (requestID, res["Message"]))
            return res
        if res["Value"] != "Done":
            return S_ERROR(
                "The request %s isn't 'Done' but '%s', this should never happen, why are we here?"
                % (requestID, res["Value"]))

        # The request is 'Done', let's update the job status. If we fail, we should re-try later

        monitorServer = JobMonitoringClient(useCertificates=useCertificates)
        res = monitorServer.getJobSummary(int(jobID))
        if not res["OK"]:
            self.log.error("finalizeRequest: Failed to get job status",
                           "JobID: %d" % jobID)
            return res
        elif not res["Value"]:
            self.log.info(
                "finalizeRequest: job %d does not exist (anymore): finalizing"
                % jobID)
            return S_OK()
        else:
            jobStatus = res["Value"]["Status"]
            jobMinorStatus = res["Value"]["MinorStatus"]
            jobAppStatus = ""
            newJobStatus = ""
            if jobStatus == JobStatus.STALLED:
                # If job is stalled, find the previous status from the logging info
                res = monitorServer.getJobLoggingInfo(int(jobID))
                if not res["OK"]:
                    self.log.error(
                        "finalizeRequest: Failed to get job logging info",
                        "JobID: %d" % jobID)
                    return res
                # Check the last status was Stalled and get the one before
                if len(res["Value"]
                       ) >= 2 and res["Value"][-1][0] == JobStatus.STALLED:
                    jobStatus, jobMinorStatus, jobAppStatus = res["Value"][
                        -2][:3]
                    newJobStatus = jobStatus

            # update the job pending request digest in any case since it is modified
            self.log.info(
                "finalizeRequest: Updating request digest for job %d" % jobID)

            digest = self.getDigest(requestID)
            if digest["OK"]:
                digest = digest["Value"]
                self.log.verbose(digest)
                res = stateServer.setJobParameter(jobID, "PendingRequest",
                                                  digest)
                if not res["OK"]:
                    self.log.info(
                        "finalizeRequest: Failed to set job %d parameter: %s" %
                        (jobID, res["Message"]))
                    return res
            else:
                self.log.error(
                    "finalizeRequest: Failed to get request digest for %s: %s"
                    % (requestID, digest["Message"]))
            if jobStatus == JobStatus.COMPLETED:
                # What to do? Depends on what we have in the minorStatus
                if jobMinorStatus == JobMinorStatus.PENDING_REQUESTS:
                    newJobStatus = JobStatus.DONE
                elif jobMinorStatus == JobMinorStatus.APP_ERRORS:
                    newJobStatus = JobStatus.FAILED
                elif jobMinorStatus == JobMinorStatus.MARKED_FOR_TERMINATION:
                    # If the job has been Killed, set it Killed
                    newJobStatus = JobStatus.KILLED
                else:
                    self.log.error(
                        "finalizeRequest: Unexpected jobMinorStatus",
                        "for %d (got %s)" % (jobID, jobMinorStatus))
                    return S_ERROR("Unexpected jobMinorStatus")

            if newJobStatus:
                self.log.info(
                    "finalizeRequest: Updating job status",
                    "for %d to '%s/%s'" %
                    (jobID, newJobStatus, JobMinorStatus.REQUESTS_DONE),
                )
            else:
                self.log.info(
                    "finalizeRequest: Updating job minor status",
                    "for %d to '%s' (current status is %s)" %
                    (jobID, JobMinorStatus.REQUESTS_DONE, jobStatus),
                )
            stateUpdate = stateServer.setJobStatus(
                jobID, newJobStatus, JobMinorStatus.REQUESTS_DONE, "RMS")
            if jobAppStatus and stateUpdate["OK"]:
                stateUpdate = stateServer.setJobApplicationStatus(
                    jobID, jobAppStatus, "RMS")
            if not stateUpdate["OK"]:
                self.log.error(
                    "finalizeRequest: Failed to set job status",
                    "JobID: %d, error: %s" % (jobID, stateUpdate["Message"]),
                )
                return stateUpdate

        return S_OK(newJobStatus)
Exemplo n.º 27
0
    def test_FullChain(self):
        """This test will

        - call all the WMSClient methods
          that will end up calling all the JobManager service methods
        - use the JobMonitoring to verify few properties
        - call the JobCleaningAgent to eliminate job entries from the DBs
        """
        wmsClient = WMSClient()
        jobMonitor = JobMonitoringClient()
        jobStateUpdate = JobStateUpdateClient()

        # create the job
        job = helloWorldJob()
        jobDescription = createFile(job)

        # submit the job
        res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription))
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertTrue(isinstance(res["Value"], int),
                        msg="Got %s" % type(res["Value"]))
        self.assertEqual(res["Value"],
                         res["JobID"],
                         msg="Got %s, expected %s" %
                         (str(res["Value"]), res["JobID"]))
        jobID = res["JobID"]
        jobID = res["Value"]

        # updating the status
        res = jobStateUpdate.setJobStatus(jobID, JobStatus.RUNNING,
                                          "Executing Minchiapp", "source")
        self.assertTrue(res["OK"], res.get("Message"))

        # reset the job
        res = wmsClient.resetJob(jobID)
        self.assertTrue(res["OK"], res.get("Message"))

        # reschedule the job
        res = wmsClient.rescheduleJob(jobID)
        self.assertTrue(res["OK"], res.get("Message"))
        res = jobMonitor.getJobsStatus(jobID)
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertEqual(res["Value"][jobID]["Status"],
                         JobStatus.RECEIVED,
                         msg="Got %s" % str(res["Value"]))
        res = jobMonitor.getJobsMinorStatus([jobID])
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertEqual(res["Value"],
                         {jobID: {
                             "MinorStatus": "Job Rescheduled"
                         }},
                         msg="Got %s" % str(res["Value"]))
        res = jobMonitor.getJobsApplicationStatus([jobID])
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertEqual(res["Value"],
                         {jobID: {
                             "ApplicationStatus": "Unknown"
                         }},
                         msg="Got %s" % str(res["Value"]))
        res = jobMonitor.getJobsStates([jobID])
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertEqual(
            res["Value"],
            {
                jobID: {
                    "Status": JobStatus.RECEIVED,
                    "MinorStatus": "Job Rescheduled",
                    "ApplicationStatus": "Unknown"
                }
            },
            msg="Got %s" % str(res["Value"]),
        )

        # updating the status again
        res = jobStateUpdate.setJobStatus(jobID, JobStatus.CHECKING,
                                          "checking", "source")
        self.assertTrue(res["OK"], res.get("Message"))
        res = jobStateUpdate.setJobStatus(jobID, JobStatus.WAITING, "waiting",
                                          "source")
        self.assertTrue(res["OK"], res.get("Message"))
        res = jobStateUpdate.setJobStatus(jobID, JobStatus.MATCHED, "matched",
                                          "source")
        self.assertTrue(res["OK"], res.get("Message"))

        # kill the job
        res = wmsClient.killJob(jobID)
        self.assertTrue(res["OK"], res.get("Message"))
        res = jobMonitor.getJobsStatus(jobID)
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertEqual(res["Value"][jobID]["Status"],
                         JobStatus.KILLED,
                         msg="Got %s" % str(res["Value"]))

        # delete the job - this will just set its status to "deleted"
        res = wmsClient.deleteJob(jobID)
        self.assertTrue(res["OK"], res.get("Message"))
        res = jobMonitor.getJobsStatus(jobID)
        self.assertTrue(res["OK"], res.get("Message"))
        self.assertEqual(res["Value"][jobID]["Status"],
                         JobStatus.DELETED,
                         msg="Got %s" % str(res["Value"]))
Exemplo n.º 28
0
class TransformationInfo(object):
    """Hold information about a transformation."""
    def __init__(self, transformationID, transInfoDict, enabled, tClient,
                 fcClient, jobMon):
        """Store clients etc."""
        self.log = gLogger.getSubLogger(__name__ + "[%s]" % transformationID)
        self.enabled = enabled
        self.tID = transformationID
        self.transName = transInfoDict['TransformationName']
        self.tClient = tClient
        self.jobMon = jobMon
        self.fcClient = fcClient
        self.transType = transInfoDict['Type']
        self.authorDN = transInfoDict['AuthorDN']
        self.authorGroup = transInfoDict['AuthorGroup']
        self.jobStateClient = JobStateUpdateClient()

    def checkTasksStatus(self):
        """Check the status for the task of given transformation and taskID"""

        res = self.tClient.getTransformationFiles(
            condDict={'TransformationID': self.tID})
        if not res['OK']:
            raise RuntimeError("Failed to get transformation tasks: %s" %
                               res['Message'])

        tasksDict = defaultdict(list)
        for task in res['Value']:
            taskID = task['TaskID']
            lfn = task['LFN']
            status = task['Status']
            fileID = task['FileID']
            errorCount = task['ErrorCount']
            tasksDict[taskID].append(
                dict(FileID=fileID,
                     LFN=lfn,
                     Status=status,
                     ErrorCount=errorCount))

        return tasksDict

    def setJobDone(self, job):
        """ set the taskID to Done"""
        if not self.enabled:
            return
        self.__setTaskStatus(job, 'Done')
        if job.status != 'Done':
            self.__updateJobStatus(job.jobID, 'Done', "Job forced to Done")

    def setJobFailed(self, job):
        """ set the taskID to Done"""
        if not self.enabled:
            return
        self.__setTaskStatus(job, 'Failed')
        if job.status != 'Failed':
            self.__updateJobStatus(job.jobID, "Failed", "Job forced to Failed")

    def setInputUnused(self, job):
        """Set the inputfiles to unused"""
        self.__setInputStatus(job, 'Unused')

    def setInputMaxReset(self, job):
        """set the inputfile to MaxReset"""
        self.__setInputStatus(job, "MaxReset")

    def setInputProcessed(self, job):
        """set the inputfile to processed"""
        self.__setInputStatus(job, "Processed")

    def setInputDeleted(self, job):
        """set the inputfile to processed"""
        self.__setInputStatus(job, "Deleted")

    def __setInputStatus(self, job, status):
        """set the input file to status"""
        if self.enabled:
            result = self.tClient.setFileStatusForTransformation(
                self.tID, status, job.inputFiles, force=True)
            if not result['OK']:
                gLogger.error("Failed updating status", result['Message'])
                raise RuntimeError("Failed updating file status")

    def __setTaskStatus(self, job, status):
        """update the task in the TransformationDB"""
        taskID = job.taskID
        res = self.tClient.setTaskStatus(self.transName, taskID, status)
        if not res['OK']:
            raise RuntimeError("Failed updating task status: %s" %
                               res['Message'])

    def __updateJobStatus(self, jobID, status, minorstatus=''):
        """Update the job status."""
        if self.enabled:
            source = 'DataRecoveryAgent'
            result = self.jobStateClient.setJobStatus(jobID, status,
                                                      minorstatus, source)
        else:
            return S_OK('DisabledMode')
        if not result['OK']:
            self.log.error('Failed to update job status', result['Message'])
            raise RuntimeError('Failed to update job status')
        return result

    def __findAllDescendants(self, lfnList):
        """Find all descendants of a list of LFNs"""
        allDescendants = []
        result = self.fcClient.getFileDescendents(lfnList, range(1, 8))
        if not result['OK']:
            return allDescendants
        for dummy_lfn, descendants in result['Value']['Successful'].items():
            allDescendants.extend(descendants)
        return allDescendants

    def cleanOutputs(self, jobInfo):
        """Remove all job outputs for job represented by jobInfo object.

    Including removal of descendents, if defined.
    """
        if len(jobInfo.outputFiles) == 0:
            return
        descendants = self.__findAllDescendants(jobInfo.outputFiles)
        existingOutputFiles = [
            lfn for lfn, status in izip_longest(jobInfo.outputFiles,
                                                jobInfo.outputFileStatus)
            if status == "Exists"
        ]
        filesToDelete = existingOutputFiles + descendants

        if not filesToDelete:
            return

        if not self.enabled:
            self.log.notice("Would have removed these files: \n +++ %s " %
                            "\n +++ ".join(filesToDelete))
            return
        self.log.notice("Remove these files: \n +++ %s " %
                        "\n +++ ".join(filesToDelete))

        errorReasons = defaultdict(list)
        successfullyRemoved = 0

        for lfnList in breakListIntoChunks(filesToDelete, 200):
            with UserProxy(proxyUserDN=self.authorDN,
                           proxyUserGroup=self.authorGroup) as proxyResult:
                if not proxyResult['OK']:
                    raise RuntimeError('Failed to get a proxy: %s' %
                                       proxyResult['Message'])
                result = DataManager().removeFile(lfnList)
                if not result['OK']:
                    self.log.error("Failed to remove LFNs", result['Message'])
                    raise RuntimeError("Failed to remove LFNs: %s" %
                                       result['Message'])
                for lfn, err in result['Value']['Failed'].items():
                    reason = str(err)
                    errorReasons[reason].append(lfn)
                successfullyRemoved += len(
                    result['Value']['Successful'].keys())
        for reason, lfns in errorReasons.items():
            self.log.error("Failed to remove %d files with error: %s" %
                           (len(lfns), reason))
        self.log.notice("Successfully removed %d files" % successfullyRemoved)

    def getJobs(self, statusList=None):
        """Get done and failed jobs.

    :param list statusList: optional list of status to find jobs
    :returns: 3-tuple of OrderedDict of JobInfo objects, keyed by jobID;
              number of Done jobs; number of Failed jobs
    """
        done = S_OK([])
        failed = S_OK([])
        if statusList is None:
            statusList = ['Done', 'Failed']
        if 'Done' in statusList:
            self.log.notice("Getting 'Done' Jobs...")
            done = self.__getJobs(["Done"])
        if 'Failed' in statusList:
            self.log.notice("Getting 'Failed' Jobs...")
            failed = self.__getJobs(["Failed"])
        done = done['Value']
        failed = failed['Value']

        jobsUnsorted = {}
        for job in done:
            jobsUnsorted[int(job)] = JobInfo(job, "Done", self.tID,
                                             self.transType)
        for job in failed:
            jobsUnsorted[int(job)] = JobInfo(job, "Failed", self.tID,
                                             self.transType)
        jobs = OrderedDict(sorted(jobsUnsorted.items(), key=lambda t: t[0]))

        self.log.notice("Found %d Done Jobs " % len(done))
        self.log.notice("Found %d Failed Jobs " % len(failed))
        return jobs, len(done), len(failed)

    def __getJobs(self, status):
        """Return list of jobs with given status.

    :param list status: list of status to find
    :returns: S_OK with result
    :raises: RuntimeError when failing to find jobs
    """
        attrDict = dict(Status=status, JobGroup='%08d' % int(self.tID))
        res = self.jobMon.getJobs(attrDict)
        if res['OK']:
            self.log.debug('Found Trans jobs: %s' % res['Value'])
            return res
        else:
            self.log.error('Error finding jobs: ', res['Message'])
            raise RuntimeError('Failed to get jobs')
Exemplo n.º 29
0
__RCSID__ = "$Id$"

from DIRAC.Core.Base import Script
Script.parseCommandLine()

from DIRAC.WorkloadManagementSystem.Client.WMSClient import WMSClient

# sut
from DIRAC.WorkloadManagementSystem.Client.JobMonitoringClient import JobMonitoringClient
from DIRAC.WorkloadManagementSystem.Client.JobStateUpdateClient import JobStateUpdateClient

from DIRAC.tests.Integration.WorkloadManagementSystem.Test_Client_WMS import helloWorldJob, createFile

jobMonitoringClient = JobMonitoringClient()
jobStateUpdateClient = JobStateUpdateClient()


def createJob():

    job = helloWorldJob()
    jobDescription = createFile(job)

    wmsClient = WMSClient()
    res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription))
    assert res['OK'], res['Message']
    jobID = int(res['Value'])
    return jobID


def updateFlag():