def _get_status(self, _silent = False): """ Get job status for the Hadoop job. We try to consolidate state information from both Yarn and our status file. 1. If our status file indicates job is in fianl state, use that state. 2. If Yarn application status is Failed or Unknown or our status file do not exist, use yarn application state. 3. Otherwise, use our state """ # Get our job status file first status_file = self._exec_dir + '/status' status = self._load_file_and_parse(status_file, self._parse_status_file, silent = _silent, test_url=False) # status file should never be none now if self._is_final_state(status): return status # Mapping from Yarn application state to our job state yarn_state_to_job_state = { 'KILLED': 'Canceled', 'UNDEFINED': 'Pending', 'FAILED' : 'Failed', 'SUCCEEDED': 'Completed', 'UNKNOWN': 'Unknown' } # Get YARN application state yarn_app_states = _HadoopExecutionEnvironment.get_yarn_application_state( self.environment, self.app_id, silent = True) yarn_state = yarn_app_states['DistributedFinalState'] if yarn_app_states else 'UNKNOWN' # Consolidate the result if yarn_state == 'FAILED' or yarn_state == 'UNKNOWN' or (status is None): # get status from <appname>/<appid>/cmd_exec_dir/status user_home_dir = _HadoopExecutionEnvironment._get_user_hdfs_home_dir(self.environment) am_status_file = '%s/%s/%s/cmd_exec_dir/status' % (user_home_dir, 'dato_distributed', self.app_id) self._load_file_and_parse(am_status_file, self._parse_status_file, silent = _silent, test_url=True) return yarn_state_to_job_state.get(yarn_state) else: return status
def _get_job_state(self, app_id, silent = False): ''' Wait for a given application to enter running state ''' yarn_app_states = _HadoopExecutionEnvironment.get_yarn_application_state( self, app_id, silent = True) if not yarn_app_states: raise RuntimeError('Cannot get application status from Yarn. ' 'Please check if Yarn is in healthy state.') return yarn_app_states
def _cancel(self): success = _HadoopExecutionEnvironment.cancel_yarn_application( self.environment, self.app_id, silent = False) if not success: self._status= 'Unknown'
def _cancel_job(self, app_id, silent = False): ''' Cancel a given job ''' return _HadoopExecutionEnvironment.cancel_yarn_application( self, app_id, silent = silent)
def _prepare_job_files(self, job): ''' Prepare all job files and upload to HDFS ''' _HadoopExecutionEnvironment.prepare_job_files(self, job)
def _create_job_home_dir(self, job_name): return _HadoopExecutionEnvironment.create_job_home_dir(self, job_name)
def _submit_job(self, job_working_dir, num_workers, silent = False): return _HadoopExecutionEnvironment.submit_job( self, job_working_dir, silent)