def get_container_info(job_id): app_id = job_id.replace("job", "application") location = timeline_server['host'] port = timeline_server['port'] path = '/ws/v1/applicationhistory/apps/{0}/'.format(app_id) json_resp_ts = http_request(location, port, path, scheme=timeline_server['scheme']) if json_resp_ts is None: return None app_attempt = json_resp_ts['currentAppAttemptId'] path = '/ws/v1/applicationhistory/apps/{0}/appattempts/'.format(app_id) app_attempts = http_request(location, port, path, scheme=timeline_server['scheme']) container_attempt = [] for attempt in app_attempts['appAttempt']: path = '/ws/v1/applicationhistory/apps/{0}/appattempts/{1}/containers'.format( app_id, attempt['appAttemptId']) json_resp_containers = http_request(location, port, path, scheme=timeline_server['scheme']) if json_resp_containers is None: return None container_list = json_resp_containers['container'] container_attempt.append(container_list) containers_flat = [l for k in container_attempt for l in k] return containers_flat
def get_job_info(job_id, wfName, wfId, wfaId, wfaName): location = job_history_server['host'] port = job_history_server['port'] path = "/ws/v1/history/mapreduce/jobs/{0}".format(job_id) json_resp = http_request(location, port, path, scheme=job_history_server['scheme']) if json_resp is None: return None job_info = json_resp['job'] job_info['wfName'] = wfName job_info['wfId'] = wfId job_info['wfaId'] = wfaId job_info['wfaName'] = wfaName job_info['_plugin'] = plugin_name['oozie'] job_info['_documentType'] = "jobStats" job_info['_tag_appName'] = tag_app_name['oozie'] job_info['time'] = int(time.time()) job_info['startTime'] = int(job_info['startTime'] / 1000) if job_info['startTime'] else -1 job_info['finishTime'] = int(job_info['finishTime'] / 1000) if job_info['finishTime'] else -1 job_info['endTime'] = job_info['finishTime'] job_info['submitTime'] = int(job_info['submitTime'] / 1000) if job_info['submitTime'] else -1 job_info['avgMapTime'] = int(job_info['avgMapTime'] / 1000) job_info['avgReduceTime'] = int(job_info['avgReduceTime'] / 1000) job_info['avgShuffleTime'] = int(job_info['avgShuffleTime'] / 1000) job_info['avgMergeTime'] = int(job_info['avgMergeTime'] / 1000) job_info['avgReduceTimeMs'] = (job_info['avgReduceTime']) job_info['avgShuffleTimeMs'] = (job_info['avgShuffleTime']) job_info['avgMergeTimeMs'] = (job_info['avgMergeTime']) job_info['elapsedTime'] = job_info['endTime'] - job_info['submitTime'] if job_info['endTime'] > 0 and job_info['submitTime'] > 0 else None job_info['runTime'] = job_info['endTime'] - job_info['startTime'] if job_info['endTime'] > 0 and job_info['startTime'] > 0 else None job_info['schedulingDelay'] = job_info['startTime'] - job_info['submitTime'] if job_info['startTime'] > 0 and job_info['submitTime'] > 0 else None job_info['jobId'] = job_info['id'] path_counters = "/ws/v1/history/mapreduce/jobs/{0}/counters".format(job_id) json_resp_counters = http_request(location, port, path_counters, scheme=job_history_server['scheme']) if json_resp_counters is None: return None if json_resp_counters['jobCounters'].get('counterGroup') is not None: job_counters = json_resp_counters['jobCounters']['counterGroup'] for jc in job_counters: counter_list = jc["counter"] for counter in counter_list: job_info[convert_camelcase(counter["name"], "_") + "Total"] = counter["totalCounterValue"] job_info[convert_camelcase(counter["name"], "_") + "Reduce"] = counter["reduceCounterValue"] job_info[convert_camelcase(counter["name"], "_") + "Map"] = counter["mapCounterValue"] return job_info
def get_job_counters(job_id): location = job_history_server['ip'] port = job_history_server['port'] path = "/ws/v1/history/mapreduce/jobs/{0}/counters".format(job_id) json_resp = http_request(location, port, path, scheme=job_history_server['scheme']) if json_resp is None: return None job_counters = json_resp['jobCounters']['counterGroup'] job_counters_json = {} job_counters_json['_plugin'] = plugin_name['oozie'] job_counters_json['_documentType'] = "jobStats" job_counters_json['_tag_appName'] = tag_app_name['oozie'] job_counters_json['time'] = int(time.time() * 1000) for jc in job_counters: counter_list = jc["counter"] for counter in counter_list: job_counters_json[convert_camelcase(counter["name"], "_") + "Total"] = counter["totalCounterValue"] job_counters_json[convert_camelcase(counter["name"], "_") + "Reduce"] = counter["reduceCounterValue"] job_counters_json[convert_camelcase(counter["name"], "_") + "Map"] = counter["mapCounterValue"] return job_counters_json
def get_app_info(app_id, wfName, wfId, wfaId, wfaName): location = timeline_server['host'] port = timeline_server['port'] path = '/ws/v1/applicationhistory/apps/{0}/'.format(app_id) app_info = http_request(location, port, path, scheme=timeline_server['scheme']) if app_info is None: return None app_info['wfName'] = wfName app_info['wfId'] = wfId app_info['wfaId'] = wfaId app_info['wfaName'] = wfaName app_info['_plugin'] = plugin_name['oozie'] app_info['_documentType'] = "appStats" app_info['_tag_appName'] = tag_app_name['oozie'] app_info['time'] = int(time.time() * 1000) # Convert times to epoch seconds from ms app_info['startedTime'] = int(app_info['startedTime'] / 1000) app_info['finishedTime'] = int(app_info['finishedTime'] / 1000) app_info['elapsedTime'] = int(app_info['elapsedTime'] / 1000) app_info['submittedTime'] = int(app_info['submittedTime'] / 1000) return app_info
def get_job_details(app, name, attempt_id): location = spark2_history_server.get('host') port = spark2_history_server.get('port') if attempt_id == 0: path = '/api/v1/applications/{}/jobs'.format(app) else: path = '/api/v1/applications/{}/{}/jobs'.format(app, attempt_id) job_details = http_request(location, port, path, scheme=spark2_history_server.get('scheme')) if job_details is None: return None for job in job_details: job['appId'] = app job['appAttemptId'] = attempt_id job['appName'] = name job['jobName'] = job.pop('name') job['_documentType'] = 'sparkJobs' job['_tag_appName'] = tag_app_name['spark'] job['_plugin'] = plugin_name['spark'] job['submitTime'] = convert_to_epoch(job['submissionTime']) job['startTime'] = convert_to_epoch(job['submissionTime']) job['endTime'] = convert_to_epoch(job['completionTime']) job['elapsedTime'] = job['endTime'] - job['submitTime'] job['runTime'] = job['endTime'] - job['startTime'] job['schedulingDelay'] = job['startTime'] - job['submitTime'] job['time'] = int(time.time() * 1000) return job_details
def get_task_info(job_id, wfName, wfId, wfaId, wfaName): location = job_history_server['host'] port = job_history_server['port'] path = '/ws/v1/history/mapreduce/jobs/{0}'.format(job_id) json_resp_ts = http_request(location, port, path, scheme=job_history_server['scheme']) if json_resp_ts is None: return None path = '/ws/v1/history/mapreduce/jobs/{0}/tasks'.format(job_id) json_resp_tasks = http_request(location, port, path, scheme=job_history_server['scheme']) if json_resp_tasks is None: return None task_list = json_resp_tasks['tasks']['task'] task_document_list = [] for task in task_list: task['wfName'] = wfName task['wfId'] = wfId task['wfaId'] = wfaId task['wfaName'] = wfaName task['_plugin'] = plugin_name['oozie'] task['_documentType'] = "taskStats" task['jobId'] = job_id task['name'] = json_resp_ts['job']['name'] task['_tag_appName'] = tag_app_name['oozie'] task['taskId'] = task['id'] task['time'] = int(time.time() * 1000) task['submitTime'] = int(task['startTime'] / 1000) task['startTime'] = int(task['startTime'] / 1000) task['finishTime'] = int(task['finishTime'] / 1000) task['endTime'] = int(task['finishTime'] / 1000) task['elapsedTime'] = int(task['elapsedTime'] / 1000) task['runTime'] = task['endTime'] - task['startTime'] if task[ 'endTime'] and task['startTime'] else None task['schedulingDelay'] = task['startTime'] - task[ 'submitTime'] if task['startTime'] and task['submitTime'] else None task_document_list.append(task) return task_document_list
def get_stages(app, name, attempt_id): location = spark2_history_server['host'] port = spark2_history_server['port'] scheme = spark2_history_server['scheme'] if attempt_id == 0: path = '/api/v1/applications/{}/stages'.format(app) else: path = '/api/v1/applications/{}/{}/stages'.format(app, attempt_id) all_stages_json = http_request(location, port, path, scheme=scheme) if all_stages_json is None: return None #logger.debug("All stages: {0}".format(all_stages_json)) def updates(d): d['appId'] = app d['appName'] = name d['appAttemptId'] = attempt_id d['stageAttemptId'] = d.pop('attemptId') d['_documentType'] = 'sparkStages' d['_tag_appName'] = tag_app_name['spark'] d['_plugin'] = plugin_name['spark'] d['time'] = int(time.time()) metrics = d['accumulatorUpdates'] for m in metrics: old_name = m['name'] #logger.debug(metrics) if '.' in old_name: new_name = old_name.split(".")[2] + "".join(x[0].capitalize()+x[1::] for x in old_name.split(".")[3::]) if isInt(m['value']): d[new_name] = int(m['value']) elif isFloat(m['value']): d[new_name] = float(m['value']) else: d[new_name] = m['value'] else: #TODO The metrics name does not contain . and is repeated multiple times continue d.pop('accumulatorUpdates', None) if d['status'] == "COMPLETE" or d['status'] == "FAILED": d['submissionTime'] = convert_to_epoch(d['submissionTime']) if 'submissionTime' in d else convert_to_epoch(d['completionTime']) d['firstTaskLaunchedTime'] = convert_to_epoch(d['firstTaskLaunchedTime']) if 'firstTaskLaunchedTime' in d else convert_to_epoch(d['completionTime']) d['completionTime'] = convert_to_epoch(d['completionTime']) d['runTime'] = d['completionTime'] - d['firstTaskLaunchedTime'] d['elapsedTime'] = d['completionTime'] - d['submissionTime'] d['schedulingDelay'] = d['firstTaskLaunchedTime'] - d['submissionTime'] d['executorRunTime'] = int(d['executorRunTime']) if 'executorRunTime' in d else 0 d['stageName'] = d.pop('name') [updates(d) for d in all_stages_json] return all_stages_json
def get_task_ids_by_job(job_id): location = job_history_server['host'] port = job_history_server['port'] path = '/ws/v1/history/mapreduce/jobs/{0}/tasks'.format(job_id) json_resp_tasks = http_request(location, port, path, scheme=job_history_server['scheme']) if json_resp_tasks is None: return None task_list = json_resp_tasks['tasks']['task'] task_id_list = [] for task in task_list: task_id_list.append(task['id']) return task_id_list
def is_task_running(taskId): task_status_path = "/connectors/{0}/tasks/{1}/status".format( kafka_sink_name, taskId) for node in confluent_kafka_rest_server['hosts']: result = http_request(node, confluent_kafka_rest_server['port'], path=task_status_path, scheme=elastic['scheme']) if result: break if result and 'state' in result: return result['state'].lower() == "running" else: return False
def get_stage_attempt_ids(app, app_attempt): location = spark2_history_server['host'] port = spark2_history_server['port'] scheme = spark2_history_server['scheme'] if app_attempt == 0: path = '/api/v1/applications/{}/stages'.format(app) else: path = '/api/v1/applications/{}/{}/stages'.format(app, app_attempt) all_stages_json = http_request(location, port, path, scheme=scheme) if all_stages_json is None: return None stage_ids = [{'stageId': s['stageId'], 'stageAttemptId': s['attemptId'], 'numTasks' : s['numTasks'] if 'numTasks' in s else s['numCompleteTasks'] + s['numFailedTasks'] } for s in all_stages_json if 'status' in s and s['status'] == "COMPLETE" ] return stage_ids
def get_kafka_connector_sink_info(): config_path = "/connectors/{0}".format(kafka_sink_name) for node in confluent_kafka_rest_server['hosts']: result = http_request(node, confluent_kafka_rest_server['port'], path=config_path, scheme=elastic['scheme']) if result: break if result: logger.debug("Kafka sink info: {0}".format(result)) return result else: logger.error("Failed to get a active Kafka sink information ") return None
def is_kafka_connector_active(): connectors_path = "/connectors" for node in confluent_kafka_rest_server['hosts']: result = http_request(node, confluent_kafka_rest_server['port'], path=connectors_path, scheme=elastic['scheme']) if result: break if result: logger.debug("Kafka connector active: {0}".format(result)) return result else: logger.error("Failed to find a active Kafka connector ") return None
def get_active_es_cluster_nodes(): rest_nodes_path = '/_nodes' result = None for host in elastic['hosts']: result = http_request(host, elastic['port'], path=rest_nodes_path, scheme=elastic['scheme']) logger.debug(result) if result: break if result: return [node['host'] for node in result['nodes'].values()] else: return None
def get_active_resource_manager(resource_manager_list): for nn in resource_manager_list: location = nn port = resource_manager['port'] path = "/ws/v1/cluster/info" res = http_request(location, port, path, scheme=name_node['scheme']) try: if res == None: continue if res['clusterInfo']['haState'] == 'ACTIVE': return nn except KeyError as e: continue return None
def get_active_nn(name_node_list): for nn in name_node_list: location = nn port = name_node['port'] path = "/jmx?qry=Hadoop:service=NameNode,name={}".format('NameNodeStatus') json_doc = http_request(location, port, path, scheme=name_node['scheme']) try: if json_doc == None: continue if json_doc.get('beans') == []: continue except KeyError as e: continue if json_doc['beans'][0]['State'] == 'active': return nn return None
def get_executors(app, name, attempt_id): location = spark2_history_server['host'] port = spark2_history_server['port'] scheme = spark2_history_server['scheme'] if attempt_id == 0: path = '/api/v1/applications/{}/allexecutors'.format(app) else: path = '/api/v1/applications/{}/{}/allexecutors'.format( app, attempt_id) executors = http_request(location, port, path, scheme=scheme) if executors is None: return None def update(e): #logger.debug("The value of e: {0}".format(e)) if 'memoryMetrics' in e: for m in e['memoryMetrics']: e[m] = e['memoryMetrics'][m] e.pop('memoryMetrics') for m in e['executorLogs']: new_key = 'executorLogs' + m.capitalize() e[new_key] = e['executorLogs'][m] e.pop('executorLogs') e['appId'] = app e['appAttemptId'] = attempt_id e['executorId'] = e.pop('id') e['_documentType'] = 'sparkExecutors' e['_tag_appName'] = tag_app_name['spark'] e['_plugin'] = plugin_name['spark'] if 'addTime' in e: e['addTime'] = convert_to_epoch(e['addTime']) e['time'] = int(time.time() * 1000) e['appName'] = name e['totalDuration'] = e['totalDuration'] e['totalGCTime'] = e['totalGCTime'] [update(e) for e in executors] return executors
def get_tasks_per_stage(app, name, attempt_id): location = spark2_history_server['host'] port = spark2_history_server['port'] scheme = spark2_history_server['scheme'] result = [] stage = get_stage_attempt_ids(app, attempt_id) if stage is None: return None for s in stage: if s['numTasks'] > 0: if attempt_id == 0: path = '/api/v1/applications/{}/stages/{}/{}/taskList?offset=0&length={}'.format( app, s['stageId'], s['stageAttemptId'], s['numTasks']) else: path = '/api/v1/applications/{}/{}/stages/{}/{}/taskList?offset=0&length={}'.format( app, attempt_id, s['stageId'], s['stageAttemptId'], s['numTasks']) else: continue tasks_json = http_request(location, port, path, scheme=scheme) if tasks_json is None: return None def updates(task): task['appId'] = app task['appAttemptId'] = attempt_id task['appName'] = name task['stageAttemptId'] = s['stageAttemptId'] task['stageId'] = s['stageId'] task['sparkTaskId'] = int(task.pop('taskId')) if 'taskMetrics' in task: metrics = task['taskMetrics'] for m in metrics: if isinstance(metrics[m], dict): #flatten metrics for k in metrics[m]: new_key = m.replace( "Metrics", "") + k[0].capitalize() + k[1::] task[new_key] = metrics[m][k] else: task[m] = metrics[m] task.pop('taskMetrics') task.pop('accumulatorUpdates') task['_documentType'] = 'sparkTasks' task['_tag_appName'] = tag_app_name['spark'] task['_plugin'] = plugin_name['spark'] task['launchTime'] = convert_to_epoch(task['launchTime']) #if 'duration' in task: # task['endTime'] = task['launchTime'] + int(task['duration']/1000) if 'executorRunTime' in task: task['executorRunTime'] = task['executorRunTime'] else: task['executorRunTime'] = 0 task['endTime'] = task['launchTime'] + int( task['executorRunTime'] / 1000) task['runTime'] = task['endTime'] - task['launchTime'] task['elapsedTime'] = task['endTime'] - task['launchTime'] task['schedulingDelay'] = 0 task['time'] = int(time.time() * 1000) [updates(t) for t in tasks_json] result += tasks_json return result
def get_taskattempt_container_info(job_id, task_ids, wfName, wfId, wfaId, wfaName): try: if task_ids is None: return None location = job_history_server['host'] port = job_history_server['port'] #containers_list = get_container_info(job_id) containers_list = [] task_attempt_document_job = [] for task in task_ids: path = '/ws/v1/history/mapreduce/jobs/{0}/tasks/{1}/attempts'.format( job_id, task) json_resp_tasks = http_request(location, port, path, scheme=job_history_server['scheme']) if json_resp_tasks is None: return None task_attempt_list = json_resp_tasks['taskAttempts']['taskAttempt'] task_attempt_document = [] for task_attempt in task_attempt_list: task_attempt['wfName'] = wfName task_attempt['wfId'] = wfId task_attempt['wfaId'] = wfaId task_attempt['wfaName'] = wfaName task_attempt['_plugin'] = plugin_name['oozie'] task_attempt['_documentType'] = 'taskAttemptStat' task_attempt['_tag_appName'] = tag_app_name['oozie'] task_attempt['jobId'] = job_id task_attempt['taskId'] = task task_attempt['time'] = int(time.time() * 1000) task_attempt['submitTime'] = int( task_attempt['startTime'] / 1000) if task_attempt['startTime'] else -1 task_attempt['startTime'] = int( task_attempt['startTime'] / 1000) if task_attempt['startTime'] else -1 task_attempt['finishTime'] = int( task_attempt['finishTime'] / 1000) if task_attempt['finishTime'] else -1 task_attempt['endTime'] = task_attempt['finishTime'] task_attempt['elapsedTime'] = int( task_attempt['elapsedTime'] / 1000) if task_attempt['elapsedTime'] else -1 task_attempt['runTime'] = task_attempt[ 'endTime'] - task_attempt['startTime'] if task_attempt[ 'endTime'] > 0 and task_attempt['startTime'] > 0 else -1 task_attempt['schedulingDelay'] = task_attempt['startTime'] - task_attempt['submitTime'] if task_attempt['submitTime'] > 0 and \ task_attempt['startTime'] > 0 else -1 task_attempt['taskAttemptId'] = task_attempt['id'] task_attempt['containerId'] = task_attempt.pop( 'assignedContainerId') if 'shuffleFinishTime' in task_attempt: task_attempt['shuffleFinishTime'] = int( task_attempt['shuffleFinishTime'] / 1000) if 'mergeFinishTime' in task_attempt: task_attempt['mergeFinishTime'] = int( task_attempt['mergeFinishTime'] / 1000) if 'elapsedShuffleTime' in task_attempt: task_attempt['elapsedShuffleTime'] = int( task_attempt['elapsedShuffleTime'] / 1000) if 'elapsedMergeTime' in task_attempt: task_attempt['elapsedMergeTime'] = int( task_attempt['elapsedMergeTime'] / 1000) if 'elapsedReduceTime' in task_attempt: task_attempt['elapsedReduceTime'] = int( task_attempt['elapsedReduceTime'] / 1000) # Find the container from container app list and merge for container in containers_list: if container['containerId'] == task_attempt['containerId']: task_attempt['allocatedMB'] = container['allocatedMB'] task_attempt['allocatedVCores'] = container[ 'allocatedVCores'] break # Merge the counters document path = '/ws/v1/history/mapreduce/jobs/{0}/tasks/{1}/attempts/{2}/counters'.format( job_id, task, task_attempt['taskAttemptId']) json_resp_tasks1 = http_request( location, port, path, scheme=job_history_server['scheme']) task_attempt_counter_group = json_resp_tasks1[ 'jobTaskAttemptCounters']["taskAttemptCounterGroup"] task_attempt_counter = {} for group in task_attempt_counter_group: counter_list = group["counter"] for counter in counter_list: task_attempt_counter[convert_camelcase( counter["name"], "_")] = counter["value"] task_attempt.update(task_attempt_counter) if task_attempt['nodeHttpAddress']: task_attempt['nodeHttpAddress'] = task_attempt[ 'nodeHttpAddress'].split(":")[0] task_attempt_document.append(task_attempt) task_attempt_document_job.append(task_attempt_document) return task_attempt_document_job except Exception as e: logger.debug('Unable to get task details => ' + traceback.format_exc().splitlines()[-1]) return None