Python DashboardInfo.jobKilled示例

编程语言: Python

命名空间/包名称: WMCore.WMRuntime.DashboardInterface

类/类型: DashboardInfo

方法/功能: jobKilled

hotexamples.com的示例: 5

Python DashboardInfo.jobKilled - 已找到5个示例。这些是从开源项目中提取的最受好评的WMCore.WMRuntime.DashboardInterface.DashboardInfo.jobKilled现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

DashboardInfo(5)

addDestination(4)

jobEnd(4)

stepEnd(4)

stepStart(4)

get(2)

jobKilled(2)

jobStart(2)

periodicUpdate(1)

stepKilled(1)

示例#1

显示文件

文件： DashboardInterface_t.py 项目： vytjan/WMCore

    def testAKilledJobMonitoring(self):
        """
        _TestAKilledJobMonitoring_

        Simulate a job that is killed check that the data sent is
        correct
        """

        # Get the necessary objects
        name = 'testC'
        job = self.createTestJob()
        workload = self.createWorkload()
        task = workload.getTask(taskName="DataProcessing")
        report = self.createReport(outcome=1)

        # Fill the job environment
        self.setupJobEnvironment(name=name)

        # Instantiate DBInfo
        dbInfo = DashboardInfo(job=job,
                               task=task,
                               dashboardUrl='127.0.0.1:8884')

        # Check jobStart information
        data = dbInfo.jobStart()
        self.assertEqual(data['MessageType'], 'JobStatus')
        self.assertEqual(data['StatusValue'], 'running')
        self.assertEqual(data['StatusDestination'], "T1_US_FNAL")
        self.assertEqual(data['taskId'], 'wmagent_Tier1ReReco')

        # Do the first step
        step = task.getStep(stepName="cmsRun1")

        # Do the step start
        data = dbInfo.stepStart(step=step.data)
        self.assertNotEqual(data['jobStart'], None)
        self.assertEqual(data['jobStart']['ExeStart'], step.name())
        self.assertEqual(data['jobStart']['WNHostName'], socket.gethostname())
        self.assertEqual(data['1_ExeStart'], step.name())

        #Do the step end
        data = dbInfo.stepEnd(step=step.data, stepReport=report)
        self.assertEqual(data['1_ExeEnd'], step.name())
        self.assertNotEqual(data['1_ExeExitCode'], 0)
        self.assertTrue(data['1_ExeWCTime'] >= 0)

        # Kill the job!
        data = dbInfo.jobKilled()
        self.assertEqual(data['ExeEnd'], "cmsRun1")
        self.assertNotEqual(data['JobExitCode'], 0)
        self.assertEqual(data['WrapperCPUTime'], 0)
        self.assertTrue(data['WrapperWCTime'] >= 0)
        self.assertNotEqual(data['JobExitReason'].find('killed'), -1)

        return

示例#2

显示文件

文件： DashboardInterface_t.py 项目： AndrewLevin/WMCore

    def testAKilledJobMonitoring(self):
        """
        _TestAKilledJobMonitoring_

        Simulate a job that is killed check that the data sent is
        correct
        """

        # Get the necessary objects
        name     = 'testC'
        job      = self.createTestJob()
        workload = self.createWorkload()
        task     = workload.getTask(taskName = "DataProcessing")
        report   = self.createReport(outcome = 1)

        # Fill the job environment
        self.setupJobEnvironment(name = name)

        # Instantiate DBInfo
        dbInfo   = DashboardInfo(job = job, task = task)
        dbInfo.addDestination('127.0.0.1', 8884)

        # Check jobStart information
        data = dbInfo.jobStart()
        self.assertEqual(data['MessageType'], 'JobStatus')
        self.assertEqual(data['StatusValue'], 'running')
        self.assertEqual(data['StatusDestination'], "T1_US_FNAL")
        self.assertEqual(data['taskId'], 'wmagent_Tier1ReReco')

        # Do the first step
        step = task.getStep(stepName = "cmsRun1")

        # Do the step start
        data = dbInfo.stepStart(step = step.data)
        self.assertNotEqual(data['jobStart'], None)
        self.assertEqual(data['jobStart']['ExeStart'], step.name())
        self.assertEqual(data['jobStart']['WNHostName'], socket.gethostname())
        self.assertEqual(data['1_ExeStart'], step.name())

        #Do the step end
        data = dbInfo.stepEnd(step = step.data, stepReport = report)
        self.assertEqual(data['1_ExeEnd'], step.name())
        self.assertNotEqual(data['1_ExeExitCode'], 0)
        self.assertTrue(data['1_ExeWCTime'] >= 0)

        # Kill the job!
        data = dbInfo.jobKilled()
        self.assertEqual(data['ExeEnd'], "cmsRun1")
        self.assertNotEqual(data['JobExitCode'], 0)
        self.assertEqual(data['WrapperCPUTime'], 0)
        self.assertTrue(data['WrapperWCTime'] >= 0)
        self.assertNotEqual(data['JobExitReason'].find('killed'), -1)

        return

示例#3

显示文件

class DashboardMonitor(WMRuntimeMonitor):
    """
    _DashboardMonitor_

    Run in the background and pass information to
    the DashboardInterface instance.

    If the job exceeds timeouts, kill the job
    """
    def __init__(self):
        self.startTime = None
        self.currentStep = None
        self.currentStepName = None
        self.currentStepSpace = None
        self.task = None
        self.job = None
        self.dashboardInfo = None
        WMRuntimeMonitor.__init__(self)

    def initMonitor(self, task, job, logPath, args={}):
        """
        Handles the monitor initiation

        """
        logging.info("In DashboardMonitor.initMonitor")

        self.task = task
        self.job = job

        destHost = args.get('destinationHost', None)
        destPort = args.get('destinationPort', None)
        dashboardUrl = '%s:%s' % (destHost, str(destPort))
        cores = args.get('cores', 0)

        self.dashboardInfo = DashboardInfo(task,
                                           job,
                                           dashboardUrl=dashboardUrl,
                                           overrideCores=cores)

    def jobStart(self, task):
        """
        Job start notifier.
        """
        try:
            self.dashboardInfo.jobStart()
        except Exception as ex:
            logging.error(str(ex))
            logging.error(str(traceback.format_exc()))

        return

    def jobEnd(self, task):
        """
        Job End notification

        """
        try:
            self.dashboardInfo.jobEnd()
        except Exception as ex:
            logging.error(str(ex))
            logging.error(str(traceback.format_exc()))

        return

    def stepStart(self, step):
        """
        Step start notification

        """
        self.currentStep = step
        self.currentStepName = getStepName(step)
        self.currentStepSpace = None
        self.startTime = time.time()
        try:
            self.dashboardInfo.stepStart(step=step)
        except Exception as ex:
            logging.error(str(ex))
            logging.error(str(traceback.format_exc()))
        return

    def stepEnd(self, step, stepReport):
        """
        Step end notification

        """
        self.currentStep = None
        self.currentStepName = None
        self.currentStepSpace = None
        try:
            self.dashboardInfo.stepEnd(step=step, stepReport=stepReport)
        except Exception as ex:
            logging.error(str(ex))
            logging.error(str(traceback.format_exc()))
        return

    def stepKilled(self, step):
        """
        Step killed notification

        """

        self.currentStep = None
        self.currentStepName = None
        try:
            self.dashboardInfo.stepKilled(step=step)
        except Exception as ex:
            logging.error(str(ex))
            logging.error(str(traceback.format_exc()))
        return

    def jobKilled(self, task):
        """
        Killed job notification

        """
        try:
            self.dashboardInfo.jobKilled()
        except Exception as ex:
            logging.error(str(ex))
            logging.error(str(traceback.format_exc()))
        return

    def periodicUpdate(self):
        """
        Run on the defined intervals. Tell the dashboard info to run the
        periodic update

        """

        try:
            self.dashboardInfo.periodicUpdate()
        except Exception as ex:
            logging.error(str(ex))
            logging.error(str(traceback.format_exc()))
        return

示例#4

显示文件

class DashboardMonitor(WMRuntimeMonitor):
    """
    _DashboardMonitor_
    
    Run in the background and pass information to
    the DashboardInterface instance.

    If the job exceeds timeouts, kill the job
    """

    def __init__(self):
        self.startTime        = None
        self.currentStep      = None
        self.currentStepName  = None
        self.currentStepSpace = None
        self.softTimeOut      = None
        self.hardTimeOut      = None
        self.killFlag         = False
        self.cmsswFile        = None
        self.task             = None
        self.job              = None
        self.dashboardInfo    = None
        WMRuntimeMonitor.__init__(self)


    def initMonitor(self, task, job, logPath, args = {}):
        """
        Handles the monitor initiation

        """
        logging.info("In DashboardMonitor.initMonitor")

        self.task    = task
        self.job     = job
        self.logPath = logPath

        self.softTimeOut = args.get('softTimeOut', None)
        self.hardTimeOut = args.get('hardTimeOut', None)
        
        destHost = args.get('destinationHost', None)
        destPort = args.get('destinationPort', None)

        self.dashboardInfo = DashboardInfo(task = task, job = job)

        if destHost and destPort:
            logging.info("About to set destination to %s:%s" % (destHost, destPort)) 
            self.dashboardInfo.addDestination(host = destHost,
                                              port = destPort)


    def jobStart(self, task):
        """
        Job start notifier.
        """

        self.dashboardInfo.jobStart()

        return


    def jobEnd(self, task):
        """
        Job End notification

        """

        self.dashboardInfo.jobEnd()

        return

    def stepStart(self, step):
        """
        Step start notification

        """
        self.currentStep      = step
        self.currentStepName  = getStepName(step)
        self.currentStepSpace = None
        self.startTime        = time.time()
        self.dashboardInfo.stepStart(step = step)

        return

    def stepEnd(self, step, stepReport):
        """
        Step end notification

        """
        self.currentStep      = None
        self.currentStepName  = None
        self.currentStepSpace = None
        self.dashboardInfo.stepEnd(step = step,
                                   stepReport = stepReport)
        return


    def stepKilled(self, step):
        """
        Step killed notification

        """

        self.currentStep     = None
        self.currentStepName = None
        self.dashboardInfo.stepKilled(step = step)


    def jobKilled(self, task):
        """
        Killed job notification

        """

        self.dashboardInfo.jobKilled()

        return


    def periodicUpdate(self):
        """
        Run on the defined intervals.

        """
        
        if not self.currentStep:
            #We're probably between steps
            return

        self.dashboardInfo.periodicUpdate()


        #Check for events
        if self.cmsswFile:
            run, event = searchForEvent(file)
            if run and event:
                #Then we actually found something, otherwise do nothing
                #Right now I don't know what to do
                pass

        #Do timeout
        if not self.softTimeOut:
            return


        if time.time() - self.startTime > self.softTimeOut:
            #Then we have to kill the process

            # If our stepName is None, we're inbetween steps.  Nothing to kill!
            if self.currentStepName == None:
                return

            # If our stepName is valid, then we may need the stepSpace
            if self.currentStepSpace == None:
                self.currentStepSpace = getStepSpace(self.currentStepName)

            #First, get the PID
            stepPID = getStepPID(self.currentStepSpace, self.currentStepName)
        
            #Now kill it!
            msg = ""
            msg += "Start Time: %s\n" % self.startTime
            msg += "Time Now: %s\n" % time.time()
            msg += "Timeout: %s\n" % self.softTimeOut
            msg += "Killing Job...\n"
            msg += "Process ID is: %s\n" % stepPID

            # If possible, write a FWJR
            report  = Report.Report()
            try:
                self.logPath = os.path.join(self.currentStepSpace.location,
                                            '../../../', os.path.basename(self.logPath))
                if os.path.isfile(self.logPath):
                    # We should be able to find existant job report.
                    # If not, we're in trouble
                    logging.debug("Found pre-existant error report in DashboardMonitor termination.")
                    report.load(self.logPath)
                report.addError(stepName = self.currentStepName, exitCode = 99901,
                                errorType = "JobTimeout", errorDetails = msg)
                report.save(self.logPath)
            except Exception, ex:
                # Basically, we can't write a log report and we're hosed
                # Kill anyway, and hope the logging file gets written out
                msg2 =  "Exception while writing out jobReport.\n"
                msg2 += "Aborting job anyway: unlikely you'll get any error report.\n"
                msg2 += str(ex)
                msg2 += str(traceback.format_exc()) + '\n'
                logging.error(msg2)

            
            if stepPID == None or stepPID == os.getpid():
                # Then we are supposed to kill things
                # that don't exist in separate processes:
                # Self-terminate
                msg += "WARNING: No separate process.  Watchdog will attempt self-termination."
                logging.error(msg)
                os.abort()
            if time.time() - self.startTime < self.hardTimeOut or not self.killFlag:
                msg += "WARNING: Soft Kill Timeout has Expired:"
                logging.error(msg)
                os.kill(stepPID, signal.SIGUSR2)
                self.killFlag = True
            elif self.killFlag:
                msg += "WARNING: Hard Kill Timeout has Expired:"
                logging.error(msg)
                os.kill(stepPID, signal.SIGTERM)
                killedpid, stat = os.waitpid(stepPID, os.WNOHANG)
                if killedpid == 0:
                    os.kill(stepPID, signal.SIGKILL)
                    killedpid, stat = os.waitpid(stepPID, os.WNOHANG)
                    if killedpid == 0:
                        logging.error("Can't kill job.  Out of options.  Waiting for system reboot.")
                        #Panic!  It's unkillable!
                        


        return

示例#5

显示文件

文件： DashboardMonitor.py 项目： alexanderrichards/WMCore

class DashboardMonitor(WMRuntimeMonitor):
    """
    _DashboardMonitor_

    Run in the background and pass information to
    the DashboardInterface instance.

    If the job exceeds timeouts, kill the job
    """

    def __init__(self):
        self.startTime        = None
        self.currentStep      = None
        self.currentStepName  = None
        self.currentStepSpace = None
        self.task             = None
        self.job              = None
        self.dashboardInfo    = None
        WMRuntimeMonitor.__init__(self)


    def initMonitor(self, task, job, logPath, args = {}):
        """
        Handles the monitor initiation

        """
        logging.info("In DashboardMonitor.initMonitor")

        self.task    = task
        self.job     = job

        destHost = args.get('destinationHost', None)
        destPort = args.get('destinationPort', None)
        dashboardUrl = '%s:%s' % (destHost, str(destPort))
        cores = args.get('cores', 0)

        self.dashboardInfo = DashboardInfo(task, job, dashboardUrl=dashboardUrl,
                                           overrideCores=cores)

    def jobStart(self, task):
        """
        Job start notifier.
        """
        try:
            self.dashboardInfo.jobStart()
        except Exception as ex:
            logging.error(str(ex))
            logging.error(str(traceback.format_exc()))

        return


    def jobEnd(self, task):
        """
        Job End notification

        """
        try:
            self.dashboardInfo.jobEnd()
        except Exception as ex:
            logging.error(str(ex))
            logging.error(str(traceback.format_exc()))

        return

    def stepStart(self, step):
        """
        Step start notification

        """
        self.currentStep      = step
        self.currentStepName  = getStepName(step)
        self.currentStepSpace = None
        self.startTime        = time.time()
        try:
            self.dashboardInfo.stepStart(step = step)
        except Exception as ex:
            logging.error(str(ex))
            logging.error(str(traceback.format_exc()))
        return

    def stepEnd(self, step, stepReport):
        """
        Step end notification

        """
        self.currentStep      = None
        self.currentStepName  = None
        self.currentStepSpace = None
        try:
            self.dashboardInfo.stepEnd(step = step,
                                   stepReport = stepReport)
        except Exception as ex:
            logging.error(str(ex))
            logging.error(str(traceback.format_exc()))
        return


    def stepKilled(self, step):
        """
        Step killed notification

        """

        self.currentStep     = None
        self.currentStepName = None
        try:
            self.dashboardInfo.stepKilled(step = step)
        except Exception as ex:
            logging.error(str(ex))
            logging.error(str(traceback.format_exc()))
        return

    def jobKilled(self, task):
        """
        Killed job notification

        """
        try:
            self.dashboardInfo.jobKilled()
        except Exception as ex:
            logging.error(str(ex))
            logging.error(str(traceback.format_exc()))
        return


    def periodicUpdate(self):
        """
        Run on the defined intervals. Tell the dashboard info to run the
        periodic update

        """

        try:
            self.dashboardInfo.periodicUpdate()
        except Exception as ex:
            logging.error(str(ex))
            logging.error(str(traceback.format_exc()))
        return