Exemplo n.º 1
0
 def monitorJobTracker(self):
   """This method is periodically called to monitor the JobTracker of the cluster."""
   try:
     if self.__isIdle():
       if self.__idleJobTrackerHandler:
         self.__log.info('Detected cluster as idle. Calling registered callback handler.')
         self.__idleJobTrackerHandler.handleIdleJobTracker()
   except:
     self.__log.debug('Exception while monitoring job tracker. %s' % get_exception_string())
Exemplo n.º 2
0
 def monitorJobTracker(self):
   """This method is periodically called to monitor the JobTracker of the cluster."""
   try:
     if self.__isIdle():
       if self.__idleJobTrackerHandler:
         self.__log.info('Detected cluster as idle. Calling registered callback handler.')
         self.__idleJobTrackerHandler.handleIdleJobTracker()
   except:
     self.__log.debug('Exception while monitoring job tracker. %s' % get_exception_string())
Exemplo n.º 3
0
 def getJobsStatus(self):
   """This method should return the status of all jobs that are run on the HOD allocated
      hadoop cluster"""
   jobStatusList = []
   try:
     hadoop16Version = { 'major' : '0', 'minor' : '16' }
     if self.__isCompatibleHadoopVersion(hadoop16Version):
       jtStatusCommand = self.__initStatusCommand(option='-list all')
       jtStatusCommand.start()
       jtStatusCommand.wait()
       jtStatusCommand.join()
       if jtStatusCommand.exit_code() == 0:
         for line in jtStatusCommand.output():
           jobStatus = self.__extractJobStatus(line)
           if jobStatus is not None:
             jobStatusList.append(jobStatus)
   except:
     self.__log.debug('Exception while getting job statuses. %s' % get_exception_string())
   return jobStatusList
Exemplo n.º 4
0
 def getJobsStatus(self):
   """This method should return the status of all jobs that are run on the HOD allocated
      hadoop cluster"""
   jobStatusList = []
   try:
     hadoop16Version = { 'major' : '0', 'minor' : '16' }
     if self.__isCompatibleHadoopVersion(hadoop16Version):
       jtStatusCommand = self.__initStatusCommand(option='-list all')
       jtStatusCommand.start()
       jtStatusCommand.wait()
       jtStatusCommand.join()
       if jtStatusCommand.exit_code() == 0:
         for line in jtStatusCommand.output():
           jobStatus = self.__extractJobStatus(line)
           if jobStatus is not None:
             jobStatusList.append(jobStatus)
   except:
     self.__log.debug('Exception while getting job statuses. %s' % get_exception_string())
   return jobStatusList
Exemplo n.º 5
0
     opList = self.__check_operation(operation)
     if self.__opCode == 0:
       if not self.__userState.checkStateFile(CLUSTER_DATA_FILE, (os.R_OK,)):
          self.__log.critical(INVALID_STATE_FILE_MSGS[0] % \
                        self.__userState.get_state_file())
          self.__opCode = 1
          return self.__opCode
       getattr(self, "_op_%s" % opList[0])(opList)
   except HodInterruptException, h:
     self.__log.critical("op: %s failed because of a process interrupt." \
                                                               % operation)
     self.__opCode = HOD_INTERRUPTED_CODE
   except:
     self.__log.critical("op: %s failed: %s" % (operation,
                         get_exception_error_string()))
     self.__log.debug(get_exception_string())
   
   self.__cleanup()
   
   self.__log.debug("return code: %s" % self.__opCode)
   
   return self.__opCode
 
 def script(self):
   errorFlag = False
   errorMsgs = []
   scriptRet = 0 # return from the script, if run
   
   script = self.__cfg['hod']['script']
   nodes = self.__cfg['hod']['nodecount']
   clusterDir = self.__cfg['hod']['clusterdir']
Exemplo n.º 6
0
            if self.__opCode == 0:
                if not self.__userState.checkStateFile(CLUSTER_DATA_FILE,
                                                       (os.R_OK, )):
                    self.__log.critical(INVALID_STATE_FILE_MSGS[0] % \
                                  self.__userState.get_state_file())
                    self.__opCode = 1
                    return self.__opCode
                getattr(self, "_op_%s" % opList[0])(opList)
        except HodInterruptException, h:
            self.__log.critical("op: %s failed because of a process interrupt." \
                                                                      % operation)
            self.__opCode = HOD_INTERRUPTED_CODE
        except:
            self.__log.critical("op: %s failed: %s" %
                                (operation, get_exception_error_string()))
            self.__log.debug(get_exception_string())

        self.__cleanup()

        self.__log.debug("return code: %s" % self.__opCode)

        return self.__opCode

    def script(self):
        errorFlag = False
        errorMsgs = []
        scriptRet = 0  # return from the script, if run

        script = self.__cfg['hod']['script']
        nodes = self.__cfg['hod']['nodecount']
        clusterDir = self.__cfg['hod']['clusterdir']