Exemplo n.º 1
0
    def blockResult(self, modelRun, jobMI):
        # CHeck jobMI is of type MPI ...
        maxRunTime = modelRun.jobParams['maxRunTime']
        pollInterval = modelRun.jobParams['pollInterval']
        procHandle = jobMI.procHandle

        # Navigate to the model's base directory
        startDir = os.getcwd()
        if modelRun.basePath != startDir:
            print "Changing to ModelRun's specified base path '%s'" % \
                (modelRun.basePath)
            os.chdir(modelRun.basePath)

        if maxRunTime == None or maxRunTime <= 0:
            timeOut = False
            retCode = procHandle.wait()
        else:
            if pollInterval > maxRunTime: pollInterval = maxRunTime
            totalTime = 0
            timeOut = True
            while totalTime <= maxRunTime:
                # Note: current strategy in this loop means 'totalTime'
                #  recorded here will only be as accurate as size of
                #  pollInterval.
                #  Thus this is a fall-back for recording time taken.
                time.sleep(pollInterval)
                totalTime += pollInterval
                retCode = procHandle.poll()
                if retCode is not None:
                    timeOut = False
                    break
            if timeOut:
                # At this point, we know the process has run too long.
                # From Python 2.6, change this to procHandle.kill()
                print "Error: passed timeout of %s, sending quit signal." % \
                    (str(timedelta(seconds=maxRunTime)))
                os.kill(procHandle.pid, signal.SIGQUIT)
        # TODO: set finishTime

        # Check status of run (eg error status)
        stdOutFilename = modelRun.getStdOutFilename()
        stdErrFilename = modelRun.getStdErrFilename()
        if timeOut == True:
            raise ModelRunTimeoutError(modelRun.name, stdOutFilename,
                                       stdErrFilename, maxRunTime)
        if retCode != 0:
            raise ModelRunRegularError(modelRun.name, retCode, stdOutFilename,
                                       stdErrFilename)
        else:
            # Taking advantage of os.path.join functionality to automatically
            #  over-ride later absolute paths.
            absOutPath = os.path.join(modelRun.basePath, modelRun.outputPath)
            absLogPath = os.path.join(modelRun.basePath, modelRun.logPath)
            print "Model ran successfully (output saved to path %s" %\
                (absOutPath),
            if absLogPath != absOutPath:
                print ", std out & std error to %s" % (absLogPath),
            print ")."

        # Now tidy things up after the run.
        jobMI.stdOutFile.close()
        jobMI.stdErrFile.close()
        print "Doing post-run tidyup:"
        modelRun.postRunCleanup()

        # Construct a modelResult
        mResult = ModelResult(modelRun.name, absOutPath)
        mResult.jobMetaInfo = jobMI
        try:
            #TODO: the below should be a standard method of ModelResult
            tSteps, simTime = getSimInfoFromFreqOutput(mResult.outputPath)
        except ValueError:
            # For now, allow runs that didn't create a freq output
            tSteps, simTime = None, None
        #Now collect profiler performance info.
        for profiler in self.profilers:
            profiler.attachPerformanceInfo(jobMI, mResult)

        if modelRun.basePath != startDir:
            print "Restoring initial path '%s'" % \
                (startDir)
            os.chdir(startDir)
        return mResult
    def blockResult(self, modelRun, jobMI):        
        # CHeck jobMI is of type MPI ...
        maxRunTime = modelRun.jobParams['maxRunTime']
        pollInterval = modelRun.jobParams['pollInterval']
        procHandle = jobMI.procHandle

        # Navigate to the model's base directory
        startDir = os.getcwd()
        if modelRun.basePath != startDir:
            print "Changing to ModelRun's specified base path '%s'" % \
                (modelRun.basePath)
            os.chdir(modelRun.basePath)

        if maxRunTime == None or maxRunTime <= 0:    
            timeOut = False
            retCode = procHandle.wait()
        else:
            if pollInterval > maxRunTime: pollInterval = maxRunTime
            totalTime = 0
            timeOut = True
            while totalTime <= maxRunTime:
                # Note: current strategy in this loop means 'totalTime'
                #  recorded here will only be as accurate as size of 
                #  pollInterval.
                #  Thus this is a fall-back for recording time taken.
                time.sleep(pollInterval)
                totalTime += pollInterval
                retCode = procHandle.poll()
                if retCode is not None:
                    timeOut = False
                    break
            if timeOut:
                # At this point, we know the process has run too long.
                # From Python 2.6, change this to procHandle.kill()
                print "Error: passed timeout of %s, sending quit signal." % \
                    (str(timedelta(seconds=maxRunTime)))
                os.kill(procHandle.pid, signal.SIGQUIT)
        # TODO: set finishTime

        # Check status of run (eg error status)
        stdOutFilename = modelRun.getStdOutFilename()
        stdErrFilename = modelRun.getStdErrFilename()
        if timeOut == True:
            raise ModelRunTimeoutError(modelRun.name, stdOutFilename,
                stdErrFilename, maxRunTime)
        if retCode != 0:
            raise ModelRunRegularError(modelRun.name, retCode, stdOutFilename,
                stdErrFilename)
        else:
            # Taking advantage of os.path.join functionality to automatically
            #  over-ride later absolute paths.
            absOutPath = os.path.join(modelRun.basePath, modelRun.outputPath)
            absLogPath = os.path.join(modelRun.basePath, modelRun.logPath)
            print "Model ran successfully (output saved to path %s" %\
                (absOutPath),
            if absLogPath != absOutPath:
                print ", std out & std error to %s" % (absLogPath),
            print ")."

        # Now tidy things up after the run.
        jobMI.stdOutFile.close()
        jobMI.stdErrFile.close()
        print "Doing post-run tidyup:"
        modelRun.postRunCleanup()

        # Construct a modelResult
        mResult = ModelResult(modelRun.name, absOutPath)
        mResult.jobMetaInfo = jobMI
        try:
            #TODO: the below should be a standard method of ModelResult
            tSteps, simTime = getSimInfoFromFreqOutput(mResult.outputPath)
        except ValueError:
            # For now, allow runs that didn't create a freq output
            tSteps, simTime = None, None
        #Now collect profiler performance info.
        for profiler in self.profilers:
            profiler.attachPerformanceInfo(jobMI, mResult)

        if modelRun.basePath != startDir:
            print "Restoring initial path '%s'" % \
                (startDir)
            os.chdir(startDir)
        return mResult
Exemplo n.º 3
0
    def blockResult(self, modelRun, jobMetaInfo):
        # Check jobMetaInfo is of type PBS
        # via self.runType = "PBS"
        startDir = os.getcwd()
        if modelRun.basePath != startDir:
            print "Changing to ModelRun's specified base path '%s'" % \
                (modelRun.basePath)
            os.chdir(modelRun.basePath)
        jobID = jobMetaInfo.jobId
        pollInterval = modelRun.jobParams['pollInterval']
        checkOutput = 0
        # NB: unlike with the MPI Job Runner, we don't check the "maxJobTime" here:- since that was encoded
        #  in the PBS Walltime used. Wait as long as necessary for job to be queued, run, and completed
        #  in PBS system.
        pbsWaitTime = 0
        gotResult = False
        pbsError = False
        while gotResult == False:
            time.sleep(pollInterval)
            pbsWaitTime += pollInterval
            # check PBS job output ... (eg using qstat on jobID)
            qstat = os.popen("qstat " + jobID).readlines()
            qstatus = "%s" % (qstat)
            # when the job has been submitted and we query the job ID we should get something like:
            # if the job has ended:
            #qstat: Unknown Job Id 3506.tweedle
            # OR
            #3505.tweedle              cratonic30t2c3d2 WendySharples   00:15:16 E batch
            # if the job has not commenced running or is still running:
            #3505.tweedle              cratonic30t2c3d2 WendySharples   00:15:16 Q batch
            #3505.tweedle              cratonic30t2c3d2 WendySharples   00:15:16 R batch
            #3505.tweedle              cratonic30t2c3d2 WendySharples   00:15:16 S batch
            # if the job has not been able to be run:
            #3505.tweedle              cratonic30t2c3d2 WendySharples   00:15:16 C batch
            # So if we break the command line up into an array of words separated by spaces:
            qstatus = qstatus.split(" ")
            #jobName and modelName MUST be THE SAME
            for ii in range(len(qstatus)):
                if qstatus[ii] == "Unknown":
                    print "job has already run\n"
                    gotResult = True
                elif qstatus[ii] == "R":
                    print "job is still running\n"
                elif qstatus[ii] == "Q":
                    print "job is queued\n"
                elif qstatus[ii] == "C":
                    print "job is cancelled\n"
                    gotResult = True
                    pbsError = True
                elif qstatus[ii] == "E":
                    print "job has ended\n"
                    gotResult = True

        # Check status of run (eg error status)
        # TODO: archive PBS file in modelRun output directory.
        # TODO: connect/copy PBS stdout/error files to standard expected names.
        stdOutFilename = modelRun.getStdOutFilename()
        stdErrFilename = modelRun.getStdErrFilename()

        #qdel = os.popen("qdel "+jobID).readlines()
        if pbsError:
            raise ModelRunRegularError(modelRun.name, -1, stdOutFilename,
                                       stdErrFilename)
        else:
            absOutPath = os.path.join(modelRun.basePath, modelRun.outputPath)
            absLogPath = os.path.join(modelRun.basePath, modelRun.logPath)
            # TODO: Move and rename output and error files created by PBS,
            #  ... to stdOutFilename, stdErrFilename
            # check PBS output file and make sure there's something in it
            jobName = "%s" % (modelRun.name)
            jobid = jobID.split(".")
            jobNo = jobid[0]
            fileName = jobName + ".o" + jobNo
            f = open(fileName, 'r')
            lines = f.read()
            if lines == "":
                print "error in file no output obtained\n"
                raise ModelRunRegularError(modelRun.name, retCode,
                                           stdOutFilename, stdErrFilename)
            else:
                print "Model ran successfully (output saved to %s, std out"\
                " & std error to %s." % (absOutPath, absLogPath)
        print "Doing post-run tidyup:"
        modelRun.postRunCleanup()

        # Construct a modelResult
        mResult = ModelResult(modelRun.name, absOutPath)

        # Now attach appropriate Job meta info
        try:
            tSteps, simTime = getSimInfoFromFreqOutput(modelRun.outputPath)
        except ValueError:
            # For now, allow runs that didn't create a freq output
            tSteps, simTime = None, None
        # Perhaps functions on jobMetaInfo?
        # get provenance info
        # attach provenance info
        # get performance info
        # attach performance info
        mResult.jobMetaInfo = jobMetaInfo
        # Navigate to the model's base directory
        if modelRun.basePath != startDir:
            print "Restoring initial path '%s'" % (startDir)
            os.chdir(startDir)
        return mResult
    def blockResult(self, modelRun, jobMetaInfo):        
        # Check jobMetaInfo is of type PBS
        # via self.runType = "PBS" 
        startDir = os.getcwd()
        if modelRun.basePath != startDir:
            print "Changing to ModelRun's specified base path '%s'" % \
                (modelRun.basePath)
            os.chdir(modelRun.basePath)
        jobID = jobMetaInfo.jobId
        pollInterval = modelRun.jobParams['pollInterval']
        checkOutput = 0
        # NB: unlike with the MPI Job Runner, we don't check the "maxJobTime" here:- since that was encoded
        #  in the PBS Walltime used. Wait as long as necessary for job to be queued, run, and completed 
        #  in PBS system.
        pbsWaitTime = 0
        gotResult = False
        pbsError = False
        while gotResult == False:
            time.sleep(pollInterval)
            pbsWaitTime += pollInterval
            # check PBS job output ... (eg using qstat on jobID)
            qstat = os.popen("qstat "+jobID).readlines()
            qstatus = "%s" % (qstat)
            # when the job has been submitted and we query the job ID we should get something like:
            # if the job has ended:
            #qstat: Unknown Job Id 3506.tweedle
            # OR
            #3505.tweedle              cratonic30t2c3d2 WendySharples   00:15:16 E batch
            # if the job has not commenced running or is still running:
            #3505.tweedle              cratonic30t2c3d2 WendySharples   00:15:16 Q batch
            #3505.tweedle              cratonic30t2c3d2 WendySharples   00:15:16 R batch
            #3505.tweedle              cratonic30t2c3d2 WendySharples   00:15:16 S batch
            # if the job has not been able to be run:
            #3505.tweedle              cratonic30t2c3d2 WendySharples   00:15:16 C batch
            # So if we break the command line up into an array of words separated by spaces:
            qstatus = qstatus.split(" ")
            #jobName and modelName MUST be THE SAME 
            for ii in range(len(qstatus)):
                if qstatus[ii] == "Unknown":
                    print "job has already run\n"
                    gotResult = True
                elif qstatus[ii] == "R":
                    print "job is still running\n"
                elif qstatus[ii] == "Q":
                    print "job is queued\n"
                elif qstatus[ii] == "C":
                    print "job is cancelled\n"
                    gotResult = True
                    pbsError = True
                elif qstatus[ii] == "E":
                    print "job has ended\n"
                    gotResult = True

        # Check status of run (eg error status)
        # TODO: archive PBS file in modelRun output directory.
        # TODO: connect/copy PBS stdout/error files to standard expected names.
        stdOutFilename = modelRun.getStdOutFilename()
        stdErrFilename = modelRun.getStdErrFilename()
            
        #qdel = os.popen("qdel "+jobID).readlines()
        if pbsError:
            raise ModelRunRegularError(modelRun.name, -1, stdOutFilename,
                stdErrFilename)
        else:
            absOutPath = os.path.join(modelRun.basePath, modelRun.outputPath)
            absLogPath = os.path.join(modelRun.basePath, modelRun.logPath)
            # TODO: Move and rename output and error files created by PBS,
            #  ... to stdOutFilename, stdErrFilename
            # check PBS output file and make sure there's something in it
            jobName = "%s" % (modelRun.name)           
            jobid = jobID.split(".")
            jobNo = jobid[0] 
            fileName = jobName+".o"+jobNo
            f = open(fileName, 'r')
            lines = f.read()
            if lines == "":
                print "error in file no output obtained\n"
                raise ModelRunRegularError(modelRun.name, retCode,
                    stdOutFilename, stdErrFilename)
            else:    
                print "Model ran successfully (output saved to %s, std out"\
                " & std error to %s." % (absOutPath, absLogPath)
        print "Doing post-run tidyup:"
        modelRun.postRunCleanup()

        # Construct a modelResult
        mResult = ModelResult(modelRun.name, absOutPath)

        # Now attach appropriate Job meta info
        try:
            tSteps, simTime = getSimInfoFromFreqOutput(modelRun.outputPath)
        except ValueError:
            # For now, allow runs that didn't create a freq output
            tSteps, simTime = None, None
        # Perhaps functions on jobMetaInfo?
        # get provenance info
        # attach provenance info
        # get performance info
        # attach performance info
        mResult.jobMetaInfo = jobMetaInfo
        # Navigate to the model's base directory
        if modelRun.basePath != startDir:
            print "Restoring initial path '%s'" % (startDir)
            os.chdir(startDir)
        return mResult