Пример #1
0
    def __init__(self, rank=None, ATHENA_PROC_NUMBER=1, workingDir=None):
        self.__rank = rank
        self.__name = "EventServerJobManager"
        self.__eventRangeChannelName = "EventRangeChannel"
        self.__eventRanges = []
        self.__eventRangesStatus = {}
        self.__outputMessage = []
        self.__messageQueue = multiprocessing.Queue()
        self.__messageInQueue = multiprocessing.Queue()
        self.__messageThread = None
        self.__TokenExtractorCmd = None
        self.__TokenExtractorProcess = None
        self.__athenaMPProcess = None
        self.__athenaMP_isReady = False
        self.__athenaMP_needEvents = 0
        self.__pollTimeout = 5
        self.__child_pid = None
        self.__child_cpuTime = {}
        if workingDir:
            self.__log = Logger.Logger(
                filename=os.path.join(workingDir, 'EventServiceManager.log'))
        else:
            self.__log = Logger.Logger(filename='EventServiceManager.log')
        self.__childProcs = []
        self.__isKilled = False

        self.__waitTerminate = False
        self.__waitTerminateTime = 1800
        self.__startTerminateTime = None

        self.__noMoreEvents = False
        self.__insertedMessages = 0
        self.__ATHENA_PROC_NUMBER = int(ATHENA_PROC_NUMBER)
        self.__numOutputs = 0
        self.initSignalHandler()

        self.__childRetStatus = 0
        self.__retry = 0
        self.__errEvent = False

        # accounting
        self.__startTime = time.time()
        self.__readyForEventTime = None
        self.__endTime = None
        self.__startOSTimes = os.times()
        self.__log.debug("Rank %s: startOSTimes: %s" %
                         (self.__rank, self.__startOSTimes))
        self.__endOSTimes = None
        self.__totalQueuedEvents = 0
        self.__totalProcessedEvents = 0
        self.__cpuConsumptionTime = 0
        self.__helperThread = None
Пример #2
0
    def __init__(self, globalWorkingDir, localWorkingDir, outputs=None, job=None, esJobManager=None, outputDir=None, rank=None, logger=None):
        threading.Thread.__init__(self)
        self.__globalWorkingDir = globalWorkingDir
        self.__localWorkingDir = localWorkingDir
        self.__currentDir = None
        self.__rank = rank
        if logger and False:
            self.__tmpLog = logger
        else:
            curdir = _abspath (self.__localWorkingDir)
            wkdirname = "rank_%s" % str(self.__rank)
            wkdir  = _abspath (_join(curdir,wkdirname))
            self.__tmpLog = Logger.Logger(filename=os.path.join(wkdir, 'Droid.log'))
        self.__job = job
        self.__esJobManager = esJobManager
        self.__stop = threading.Event()
        self.__isFinished = False
        self.__tmpLog.info("Rank %s: Global working dir: %s" % (self.__rank, self.__globalWorkingDir))
        os.environ['PilotHomeDir'] = os.path.dirname(self.__globalWorkingDir)

        self.__jobId = None
        self.__copyOutputToGlobal = False
        self.__outputDir = outputDir

        self.__hostname = socket.getfqdn()

        self.__outputs = outputs
        self.__threadpool = None
        self.setup(job)
Пример #3
0
    def __init__(self,
                 globalWorkingDir,
                 localWorkingDir,
                 rank=None,
                 nonMPIMode=False,
                 reserveCores=0,
                 outputDir=None):
        threading.Thread.__init__(self)
        self.__globalWorkingDir = globalWorkingDir
        self.__localWorkingDir = localWorkingDir
        self.__currentDir = None
        self.__tmpLog = Logger.Logger(filename='Droid.log')
        self.__comm = Interaction.Requester(rank=rank,
                                            nonMPIMode=nonMPIMode,
                                            logger=self.__tmpLog)
        self.__esJobManager = None
        self.__isFinished = False
        if nonMPIMode:
            self.__rank = rank
        else:
            self.__rank = self.__comm.getRank()
        self.__tmpLog.info("Rank %s: Global working dir: %s" %
                           (self.__rank, self.__globalWorkingDir))
        if not os.environ.has_key('PilotHomeDir'):
            os.environ['PilotHomeDir'] = self.__globalWorkingDir

        self.initWorkingDir()
        self.__tmpLog.info("Rank %s: Current working dir: %s" %
                           (self.__rank, self.__currentDir))

        self.__jobId = None
        self.__startTimeOneJobDroid = None
        self.__cpuTimeOneJobDroid = None
        self.__poolFileCatalog = None
        self.__inputFiles = None
        self.__copyInputFiles = None
        self.__preSetup = None
        self.__postRun = None
        self.__ATHENA_PROC_NUMBER = 1
        self.__firstGetEventRanges = True
        self.__outputDir = outputDir

        self.__yodaToOS = False

        self.reserveCores = reserveCores
        self.__hostname = socket.getfqdn()

        self.__outputs = Queue()
        self.__jobMetrics = {}
        self.__stagerThread = None

        self.__stop = False

        if not nonMPIMode:
            signal.signal(signal.SIGTERM, self.stop)
            signal.signal(signal.SIGQUIT, self.stop)
            signal.signal(signal.SIGSEGV, self.stop)
            signal.signal(signal.SIGXCPU, self.stop)
            signal.signal(signal.SIGUSR1, self.stop)
            signal.signal(signal.SIGBUS, self.stop)
Пример #4
0
 def __init__(self, messageQ, socketname, context, **kwds):
     threading.Thread.__init__(self, **kwds)
     self.__log = Logger.Logger(filename='EventServiceManager.log')
     self.__messageQ = messageQ
     self._stop = threading.Event()
     try:
         self.__messageSrv = yampl.ServerSocket(socketname, context)
     except:
         self.__log.debug("Exception: failed to start yampl server socket: %s" % traceback.format_exc())
Пример #5
0
    def __init__(self, globalWorkingDir, localWorkingDir):
        self.__globalWorkingDir = globalWorkingDir
        self.__localWorkingDir = localWorkingDir
        self.__currentDir = None
        self.__comm = Interaction.Requester()
        self.__tmpLog = Logger.Logger()
        self.__esJobManager = None
        self.__rank = self.__comm.getRank()
        self.__tmpLog.info("Rank %s: Global working dir: %s" %
                           (self.__rank, self.__globalWorkingDir))
        self.initWorkingDir()
        self.__tmpLog.info("Rank %s: Current working dir: %s" %
                           (self.__rank, self.__currentDir))

        self.__poolFileCatalog = None
        self.__inputFiles = None
        self.__copyInputFiles = None
        signal.signal(signal.SIGTERM, self.stop)
Пример #6
0
    ]

job = {'PandaID':'123',
       'jobsetID':'567',
       }

f = open('job_pickle.txt','w')
pickle.dump(job,f)
f.close()

f = open('eventranges_pickle.txt','w')
pickle.dump(er,f)
f.close()

# get logger
tmpLog = Logger.Logger()


comm = MPI.COMM_WORLD
mpirank = comm.Get_rank()

if mpirank==0:
    yoda = Yoda.Yoda()
    yoda.run()
else:
    snd = Interaction.Requester()
    tmpLog.debug("rank{0} sending req".format(mpirank))
    tmpStat,jobData = snd.sendRequest('getJob',{'siteName':'TEST'})
    while True:
        tmpStat,res = snd.sendRequest('getEventRanges',{'pandaID':jobData['PandaID'],
                                                        'jobsetID':jobData['jobsetID']})
Пример #7
0
    def setup(self, job):
        try:
            self.__jobId = job.get("JobId", None)
            self.__startTimeOneJobDroid = time.time()
            self.__cpuTimeOneJobDroid = os.times()
            self.__poolFileCatalog = job.get('PoolFileCatalog', None)
            self.__inputFiles = job.get('InputFiles', None)
            self.__copyInputFiles = job.get('CopyInputFiles', False)
            self.__preSetup = job.get('PreSetup', None)
            self.__postRun = job.get('PostRun', None)

            self.__yodaToOS = job.get('yodaToOS', False)

            self.__ATHENA_PROC_NUMBER = int(job.get('ATHENA_PROC_NUMBER', 1))
            self.__ATHENA_PROC_NUMBER -= self.reserveCores
            if self.__ATHENA_PROC_NUMBER < 0:
                self.__ATHENA_PROC_NUMBER = 1
            job["AthenaMPCmd"] = "export ATHENA_PROC_NUMBER=" + str(
                self.__ATHENA_PROC_NUMBER) + "; " + job["AthenaMPCmd"]
            self.__jobWorkingDir = job.get('GlobalWorkingDir', None)
            if self.__jobWorkingDir:
                self.__jobWorkingDir = os.path.join(self.__jobWorkingDir,
                                                    'rank_%s' % self.__rank)
                if not os.path.exists(self.__jobWorkingDir):
                    os.makedirs(self.__jobWorkingDir)
                os.chdir(self.__jobWorkingDir)
                logFile = os.path.join(self.__jobWorkingDir, 'Droid.log')
                logging.basicConfig(filename=logFile, level=logging.DEBUG)
                self.__tmpLog = Logger.Logger()

            if self.__copyInputFiles and self.__inputFiles is not None and self.__poolFileCatalog is not None:
                for inputFile in self.__inputFiles:
                    shutil.copy(inputFile, './')

                pfc_name = os.path.basename(self.__poolFileCatalog)
                pfc_name = os.path.join(os.getcwd(), pfc_name)
                pfc_name_back = pfc_name + ".back"
                shutil.copy2(self.__poolFileCatalog, pfc_name_back)
                with open(pfc_name, 'wt') as pfc_out:
                    with open(pfc_name_back, 'rt') as pfc_in:
                        for line in pfc_in:
                            pfc_out.write(
                                line.replace('HPCWORKINGDIR', os.getcwd()))

                job["AthenaMPCmd"] = job["AthenaMPCmd"].replace(
                    'HPCWORKINGDIR', os.getcwd())

            self.__esJobManager = EventServerJobManager(
                self.__rank,
                self.__ATHENA_PROC_NUMBER,
                workingDir=self.__jobWorkingDir)
            status, output = self.__esJobManager.preSetup(self.__preSetup)
            if status != 0:
                return False, output

            status, output = self.startStagerThread(job)
            if status != 0:
                self.__tmpLog.warning(
                    "Rank %s: failed to start stager thread(status: %s, output: %s)"
                    % (self.__rank, status, output))
                return False, output

            # self.__esJobManager.initMessageThread(socketname='EventService_EventRanges', context='local')
            # self.__esJobManager.initTokenExtractorProcess(job["TokenExtractCmd"])
            # self.__esJobManager.initAthenaMPProcess(job["AthenaMPCmd"])
            ret = self.__esJobManager.init(
                socketname='EventService_EventRanges',
                context='local',
                athenaMPCmd=job["AthenaMPCmd"],
                tokenExtractorCmd=job["TokenExtractCmd"])
            return True, None
        except:
            errMsg = "Failed to init EventServerJobManager: %s" % str(
                traceback.format_exc())
            self.__esJobManager.terminate()
            return False, errMsg