def get_job(self, jobid): try: # App id jobid = jobid.replace('job', 'application') job = self.resource_manager_api.app(jobid)['app'] if job['state'] == 'ACCEPTED': raise ApplicationNotRunning(jobid, job) elif job['state'] == 'KILLED': return KilledYarnJob(self.resource_manager_api, job) if job.get('applicationType') == 'SPARK': job = SparkJob(job, self.resource_manager_api) elif job.get('applicationType') == 'MAPREDUCE': jobid = jobid.replace('application', 'job') if job['state'] in ('NEW', 'SUBMITTED', 'ACCEPTED', 'RUNNING'): json = self.mapreduce_api.job(self.user, jobid) job = YarnJob(self.mapreduce_api, json['job']) else: json = self.history_server_api.job(self.user, jobid) job = YarnJob(self.history_server_api, json['job']) else: job = Application(job, self.resource_manager_api) except ApplicationNotRunning, e: raise e
def get_job(self, jobid): job_id = jobid.replace('application', 'job') app_id = jobid.replace('job', 'application') try: app = self.resource_manager_api.app(app_id)['app'] if app['finalStatus'] in ('SUCCEEDED', 'FAILED', 'KILLED'): if app['applicationType'] == 'SPARK': job = SparkJob(app, rm_api=self.resource_manager_api, hs_api=self.spark_history_server_api) elif app['state'] == 'KILLED': job = KilledYarnJob(self.resource_manager_api, app) else: resp = self.history_server_api.job(self.user, job_id) job = YarnJob(self.history_server_api, resp['job']) else: if app['state'] == 'ACCEPTED': raise ApplicationNotRunning(app_id, app) # The MapReduce API only returns JSON when the application is in a RUNNING state elif app['state'] in ( 'NEW', 'SUBMITTED', 'RUNNING') and app['applicationType'] == 'MAPREDUCE': resp = self.mapreduce_api.job(self.user, job_id) job = YarnJob(self.mapreduce_api, resp['job']) else: job = Application(app, self.resource_manager_api) except RestException, e: if e.code == 404: # Job not found in RM so attempt to find job in History Server resp = self.history_server_api.job(self.user, job_id) job = YarnJob(self.history_server_api, resp['job']) else: raise JobExpired(app_id)
def get_job(self, jobid): job_id = jobid.replace('application', 'job') app_id = jobid.replace('job', 'application') try: app = self.resource_manager_api.app(app_id)['app'] if app['finalStatus'] in ('SUCCEEDED', 'FAILED', 'KILLED'): if app['applicationType'] == 'SPARK': job = SparkJob(app, rm_api=self.resource_manager_api, hs_api=self.spark_history_server_api) elif app['state'] in ('KILLED', 'FAILED'): job = KilledYarnJob(self.resource_manager_api, app) else: # Job succeeded, attempt to fetch from JHS job = self._get_job_from_history_server(job_id) else: if app['state'] == 'ACCEPTED': raise ApplicationNotRunning(app_id, app) # The MapReduce API only returns JSON when the application is in a RUNNING state elif app['state'] in ('NEW', 'SUBMITTED', 'RUNNING') and app['applicationType'] == 'MAPREDUCE': resp = self.mapreduce_api.job(self.user, job_id) if not isinstance(resp, dict): raise PopupException(_('Mapreduce Proxy API did not return JSON response, check if the job is running.')) job = YarnJob(self.mapreduce_api, resp['job']) else: job = Application(app, self.resource_manager_api) except RestException, e: if e.code == 404: # Job not found in RM so attempt to find job in JHS job = self._get_job_from_history_server(job_id) else: raise JobExpired(app_id)
def get_job(self, jobid): try: # App id jobid = jobid.replace('job', 'application') job = self.resource_manager_api.app(jobid)['app'] # MR id jobid = jobid.replace('application', 'job') if job['state'] in ('NEW', 'SUBMITTED', 'ACCEPTED', 'RUNNING'): json = self.mapreduce_api.job(self.user, jobid) job = YarnJob(self.mapreduce_api, json['job']) else: json = self.history_server_api.job(self.user, jobid) job = YarnJob(self.history_server_api, json['job']) except Exception, e: raise PopupException('Job %s could not be found: %s' % (jobid, e), detail=e)
def get_job(self, jobid): """ Try first as if it was a running job, then as a finished job. """ try: if jobid.startswith('application'): json = self.mapreduce_api.job(self.user, jobid.replace('application', 'job')) return YarnJob(self.mapreduce_api, json['job']) except Exception, e: LOG.info('Job %s not running: %s' % (jobid, e))
def get_job(self, jobid): try: # App id jobid = jobid.replace('job', 'application') job = self.resource_manager_api.app(jobid)['app'] if job['state'] == 'ACCEPTED': raise ApplicationNotRunning(jobid, job) # MR id, assume 'applicationType': 'MAPREDUCE' jobid = jobid.replace('application', 'job') if job['state'] in ('NEW', 'SUBMITTED', 'ACCEPTED', 'RUNNING'): json = self.mapreduce_api.job(self.user, jobid) job = YarnJob(self.mapreduce_api, json['job']) else: json = self.history_server_api.job(self.user, jobid) job = YarnJob(self.history_server_api, json['job']) except ApplicationNotRunning, e: raise e
def _get_job_from_history_server(self, job_id): resp = self.history_server_api.job(self.user, job_id) return YarnJob(self.history_server_api, resp['job'])
class YarnApi(JobBrowserApi): """ List all the jobs with Resource Manager API. Get running single job information with MapReduce API. Get finished single job information with History Server API. The trick is that we use appid when the job is running and jobid when it is finished. We also suppose that each app id has only one MR job id. e.g. job_1355791146953_0105, application_1355791146953_0105 A better alternative might be to call the Resource Manager instead of relying on the type of job id. The perfect solution would be to have all this logic embedded """ def __init__(self, user): self.user = user self.resource_manager_api = resource_manager_api.get_resource_manager() self.mapreduce_api = mapreduce_api.get_mapreduce_api() self.node_manager_api = node_manager_api.get_resource_manager_api() self.history_server_api = history_server_api.get_history_server_api() def get_job_link(self, job_id): return self.get_job(job_id) def get_jobs(self, user, **kwargs): state_filters = { 'running': 'UNDEFINED', 'completed': 'SUCCEEDED', 'failed': 'FAILED', 'killed': 'KILLED', } filters = {} if kwargs['username']: filters['user'] = kwargs['username'] if kwargs['state'] and kwargs['state'] != 'all': filters['finalStatus'] = state_filters[kwargs['state']] json = self.resource_manager_api.apps(**filters) if json['apps']: jobs = [Application(app) for app in json['apps']['app']] else: return [] if kwargs['text']: text = kwargs['text'].lower() jobs = filter( lambda job: text in job.name.lower() or text in job.id.lower() or text in job.user.lower() or text in job.queue.lower(), jobs) return self.filter_jobs(user, jobs) def filter_jobs(self, user, jobs, **kwargs): check_permission = not SHARE_JOBS.get() and not user.is_superuser return filter( lambda job: not check_permission or user.is_superuser or job.user == user.username, jobs) def get_job(self, jobid): """ Try first as if it was a running job, then as a finished job. """ try: if jobid.startswith('application'): json = self.mapreduce_api.job( self.user, jobid.replace('application', 'job')) return YarnJob(self.mapreduce_api, json['job']) except Exception, e: LOG.info('Job %s not running: %s' % (jobid, e)) jobid = jobid.replace('application', 'job') json = self.history_server_api.job(self.user, jobid) return YarnJob(self.history_server_api, json['job'])