Пример #1
0
def getAllApps(rmAddress):
	xTime = getTime()
	rm=ResourceManager(address=rmAddress,port=rmPort,timeout=30)
	res=rm.cluster_applications(started_time_begin=str(xTime)).data
	runningApps = rm.cluster_applications(state="RUNNING").data
	if res.get("apps") is None:
		return "null"
	else:
		if runningApps.get("apps") is None:
			return res.get("apps").get("app")
		else:
			return res.get("apps").get("app")+runningApps.get("apps").get("app")
			print(runningApps.get("apps").get("app")+res.get("apps").get("app"))
Пример #2
0
def get24MinsApp(rmAddress):
	t = get0ClockTime()
	rm=ResourceManager(address=rmAddress,port=rmPort,timeout=30)
	res=rm.cluster_applications(started_time_begin=str(t)).data
	if res.get("apps") is None:
		return "null"
	else:
		tmpList = []
		ll = res.get("apps").get("app")
		for i in ll:
			if i.get("name") == "com.yunchen.batch.BatchOneDay":
				tmpList.append(i)
		return tmpList
Пример #3
0
 def __init__(self, zaddr=ZABBIX_ADDR, zport=ZABBIX_PORT, iface=None):
     self._API_TYPE = {
         1: {
             'API_ID': 'clusterInfo',
             'API_PREFIX': 'RM',
             'API_ADDRESS': 'http://RMADDRESS:8088/ws/v1/cluster/info',
             'KEY_PREFIX': 'Info'
         },
         2: {
             'API_ID': 'clusterMetrics',
             'API_PREFIX': 'RM',
             'API_ADDRESS': 'http://RMADDRESS:8088/ws/v1/cluster/metrics',
             'KEY_PREFIX': 'Metrics'
         },
         3: {
             'API_ID': 'scheduler',
             'API_PREFIX': 'RM',
             'API_ADDRESS': 'http://RMADDRESS:8088/ws/v1/cluster/scheduler',
             'KEY_PREFIX': 'Scheduler'
         },
         4: {
             'API_ID': 'apps',
             'API_PREFIX': 'RM',
             'API_ADDRESS': 'http://RMADDRESS:8088/ws/v1/cluster/apps',
             'KEY_PREFIX': 'Apps'
         },
         5: {
             'API_ID': 'appStatInfo',
             'API_PREFIX': 'RM',
             'API_ADDRESS':
             'http://RMADDRESS:8088/ws/v1/cluster/appstatistics',
             'KEY_PREFIX': 'AppStatInfo'
         },
         6: {
             'API_ID': 'nodes',
             'API_PREFIX': 'RM',
             'API_ADDRESS': 'http://RMADDRESS:8088/ws/v1/cluster/nodes',
             'KEY_PREFIX': 'Nodes'
         },
     }
     self._type = 1
     self._activerm = self._get_activerm()
     # self.apitype= apitype
     self.zaddr = zaddr
     self.zport = zport
     self.ret_result = []
     self.final_result_dict = {}
     self.zbserver = ZabbixSender(zaddr, zport)
     self._ip = self._getLocalIP(iface)
     self.rm = ResourceManager(address=self._activerm, timeout=10)
Пример #4
0
    def configure(self):
        resource_manager_address = self.config.get('resource_manager_address')
        port = self.config.get('port')
        if port:
            self.resource_manager = ResourceManager(
                address=resource_manager_address, port=port)
            self.app_master = ApplicationMaster(
                address=resource_manager_address, port=port)
        else:
            self.resource_manager = ResourceManager(
                address=resource_manager_address)
            self.app_master = ApplicationMaster(
                address=resource_manager_address)

        self.application_ids = self.config.get('application_ids')
        self.application_status = self.config.get('application_status')
        self.application_tags = self.config.get('application_tags')
        self.application_names = self.config.get('application_names')
        self.application_status_list = []
Пример #5
0
def main():
    YarnLog.writeLog('init logging')
    # init_logging()

    # RM = rm_active_standby()
    RM = ResourceManager(address=RM_HOST, port=RM_PORT)
    AM = ApplicationMaster(address=AM_HOST, port=AM_PORT)
    appsDict = update_applicatioins_map(RM)

    if YarnConfig.getConfig('MonitorSwitch', 'FinishedSwitch') == 'True':
        logging.info(
            '############################ Filter FINISHED Application and Start it #################################'
        )
        # when spark application finished, start
        # finishedswitch = True
        failedAppsDict = filter_apps_state(appsDict, 'FINISHED')
        start_spark(failedAppsDict)

    if YarnConfig.getConfig('MonitorSwitch', 'FailedSwitch') == 'True':
        logging.info(
            '############################ Filter FAILED Application and Start it #################################'
        )
        # when spark application failed, start
        failedAppsDict = filter_apps_state(appsDict, 'FAILED')
        start_spark(failedAppsDict)

    if YarnConfig.getConfig('MonitorSwitch', 'KilledSwitch') == 'True':
        logging.info(
            '############################ Filter KILLED Application and Start it #################################'
        )

        # when spark application killed by user, start
        killedAppsDict = filter_apps_state(appsDict, 'KILLED')
        start_spark(killedAppsDict)

    if YarnConfig.getConfig('MonitorSwitch', 'RunningSwitch') == 'True':
        logging.info(
            '############################ Filter RUNNING Application and Start it #################################'
        )

        # when spark application is running
        # but the duration time of the current job for more then 2 minute; kill and will be restart by next time
        runningAppsDict = filter_apps_state(appsDict, 'RUNNING')
        durationAppDict = apps_running_duration(runningAppsDict, AM)
        # start_spark(durationAppDict)
        kill_spark(durationAppDict)

    logging.info(
        '############################ Ending ###################################'
    )
Пример #6
0
 def __init__(self, zaddr=ZABBIX_ADDR,zport=ZABBIX_PORT,iface=None):
     self._API_TYPE = {
         1: {
             'API_ID': 'clusterInfo',
             'API_PREFIX': 'RM',
             'API_ADDRESS': 'http://RMADDRESS:8088/ws/v1/cluster/info',
             'KEY_PREFIX': 'Info'
         },
         2: {
             'API_ID': 'clusterMetrics',
             'API_PREFIX': 'RM',
             'API_ADDRESS': 'http://RMADDRESS:8088/ws/v1/cluster/metrics',
             'KEY_PREFIX': 'Metrics'
         },
         3:{
             'API_ID': 'scheduler',
             'API_PREFIX': 'RM',
             'API_ADDRESS': 'http://RMADDRESS:8088/ws/v1/cluster/scheduler',
             'KEY_PREFIX': 'Scheduler'
         },
         4:{
             'API_ID': 'apps',
             'API_PREFIX': 'RM',
             'API_ADDRESS': 'http://RMADDRESS:8088/ws/v1/cluster/apps',
             'KEY_PREFIX': 'Apps'
         },
         5: {
             'API_ID':'appStatInfo',
             'API_PREFIX':'RM',
             'API_ADDRESS':'http://RMADDRESS:8088/ws/v1/cluster/appstatistics',
             'KEY_PREFIX':'AppStatInfo'
         },
         6: {
             'API_ID':'nodes',
             'API_PREFIX':'RM',
             'API_ADDRESS':'http://RMADDRESS:8088/ws/v1/cluster/nodes',
             'KEY_PREFIX':'Nodes'
         },
     }
     self._type = 1
     self._activerm = self._get_activerm()
     # self.apitype= apitype
     self.zaddr = zaddr
     self.zport = zport
     self.ret_result = []
     self.final_result_dict ={}
     self.zbserver = ZabbixSender(zaddr, zport)
     self._ip = self._getLocalIP(iface)
     self.rm = ResourceManager(address=self._activerm,timeout=10)
Пример #7
0
def rm_active_standby(active=None, back=None):
    active = ResourceManager(address=RM_HOST, port=RM_PORT)
    back = ResourceManager(address=RM_HOST_BAK, port=RM_PORT)

    activeHaState = active.cluster_information().data.get('clusterInfo').get(
        'haState')
    backHaState = back.cluster_information().data.get('clusterInfo').get(
        'haState')

    # state : ACTIVE, STANDBY
    if activeHaState == 'ACTIVE':
        logging.info("ResourceManager host = " + active.address + " is active")
        return active
    elif backHaState == 'ACTIVE':
        logging.info("ResourceManager host = " + back.address + " is active")
        return back
    else:
        logging.warning("No ResourceManager can be usered, Please check again")
Пример #8
0
    def run(self):
        analysis_timestamp = str(datetime.now())
        output_path = os.path.join(
            'daily_leader_boards', 'leader_board_' + str(self.jobs_year) +
            '-' + str(self.jobs_month).zfill(2) + '-' +
            str(self.jobs_day).zfill(2) + '.csv')

        rm = ResourceManager(configuration.yarn_resource_managers)
        metrics = rm.cluster_metrics()
        cluster_vcores_total = metrics.data['clusterMetrics'][
            'totalVirtualCores']
        cluster_daily_vcore_seconds = int(cluster_vcores_total * 60 * 60 * 24)

        cluster_memory_total_mb = metrics.data['clusterMetrics']['totalMB']
        cluster_daily_megabyte_memory_seconds = int(cluster_memory_total_mb *
                                                    60 * 60 * 24)

        begin_date = datetime(int(str(self.jobs_year)),
                              int(str(self.jobs_month)),
                              int(str(self.jobs_day)))
        end_date = begin_date + timedelta(1)
        begin_ms = str(int(begin_date.timestamp() * 1000))
        end_ms = str(int(end_date.timestamp() * 1000))

        # filter out jobs that started after the end of the analyzed day
        apps = rm.cluster_applications(
            # finished_time_begin=begin_ms,
            started_time_end=end_ms)
        applist = apps.data['apps']['app']
        total_vcore_seconds = 0
        total_mb_seconds = 0
        sum_elapsed_time_ms = 0
        overall_started_time_ms = 9999999999999
        overall_finished_time_ms = 0
        total_yarn_apps = 0

        users = {}

        app_file = 'app_lists/apps_' + str(self.jobs_year) \
                   + '-' + str(self.jobs_month).zfill(2) \
                   + '-' + str(self.jobs_day).zfill(2) + '.csv'

        apps_df = pd.DataFrame(applist)
        apps_df.to_csv(app_file)

        for app in applist:

            begin_ms_int = int(begin_ms)
            end_ms_int = int(end_ms)
            started_time = app['startedTime']
            finished_time = app['finishedTime']
            elapsed_time = app['elapsedTime']

            # disregard apps that haven't ever or yet consumed any resources
            if app['state'] not in ['FINISHED', 'FAILED', 'KILLED', 'RUNNING']:
                continue

            # disregard apps that finished before the beginning of the analyzed day
            if 0 < finished_time < begin_ms_int:
                continue

            # for scenario where job began and ended in the same day
            percent_within_day = 1.0

            # scenario where job began before the beginning of the day and ended before the end of the day
            if started_time < begin_ms_int < finished_time < end_ms_int:
                percent_within_day = (finished_time -
                                      begin_ms_int) / elapsed_time

            # scenario where job began before the beginning of the day and continued beyond the end of the day
            if started_time < begin_ms_int and (finished_time == 0
                                                or finished_time > end_ms_int):
                percent_within_day = 86400000 / elapsed_time

            # scenario where job began before the end of the day and continued beyond the end of the day
            if begin_ms_int < started_time < end_ms_int \
                    and (finished_time == 0 or end_ms_int < finished_time):
                percent_within_day = (end_ms_int - started_time) / elapsed_time

            weighted_app_vcore_seconds = int(app['vcoreSeconds'] *
                                             percent_within_day)
            weighted_app_memory_seconds = int(app['memorySeconds'] *
                                              percent_within_day)

            user = users.setdefault(
                app['user'], {
                    'user_first_task_started_time_ms': 9999999999999,
                    'last_task_finished_time_ms': 0
                })
            total_vcore_seconds += weighted_app_vcore_seconds
            total_mb_seconds += weighted_app_memory_seconds

            user['user_first_task_started_time_ms'] = app['startedTime'] \
                if app['startedTime'] < user['user_first_task_started_time_ms'] \
                else user['user_first_task_started_time_ms']
            user['last_task_finished_time_ms'] = app['finishedTime'] \
                if app['finishedTime'] > user['last_task_finished_time_ms'] \
                else user['last_task_finished_time_ms']

            overall_started_time_ms = app['startedTime'] if app['startedTime'] < overall_started_time_ms \
                else overall_started_time_ms
            overall_finished_time_ms = app['finishedTime'] if app['finishedTime'] > overall_finished_time_ms \
                else overall_finished_time_ms

            sum_elapsed_time_ms += app['elapsedTime']
            total_yarn_apps += 1

            user_total_vcore_seconds = user.setdefault('total_vcore_seconds',
                                                       0)
            user[
                'total_vcore_seconds'] = user_total_vcore_seconds + weighted_app_vcore_seconds

            user_total_mb_seconds = user.setdefault('total_MB_seconds', 0)
            user[
                'total_MB_seconds'] = user_total_mb_seconds + weighted_app_memory_seconds

        header = [
            'jobs_year', 'jobs_month', 'jobs_day',
            'cluster_daily_vcore_seconds',
            'cluster_daily_megabyte_memory_seconds', 'user',
            'used_vcore_seconds', 'percent_used_of_all_used_vcore_seconds',
            'percent_used_of_total_cluster_vcore_seconds', 'used_MB_seconds',
            'percent_used_of_all_used_MB_seconds',
            'percent_used_of_total_cluster_MB_seconds',
            'user_first_task_started_time', 'user_last_task_finished_time'
        ]

        table = []

        for user in users:

            # set last_task_finished_time to None if timestamp == 0 representing that the task hasn't finished yet
            if int(users[user]['last_task_finished_time_ms']) == 0:
                last_task_finished_time_string = ''
            else:
                last_task_finished_time_string = \
                    datetime.fromtimestamp(users[user]['last_task_finished_time_ms'] / 1000.0)\
                        .strftime('%Y-%m-%d %H:%M')

            row = [
                self.jobs_year,
                self.jobs_month,
                self.jobs_day,
                cluster_daily_vcore_seconds,
                cluster_daily_megabyte_memory_seconds,
                user,
                round(users[user]['total_vcore_seconds'], 0),
                round(
                    100 * users[user]['total_vcore_seconds'] /
                    total_vcore_seconds, 2),
                round(
                    100 * users[user]['total_vcore_seconds'] /
                    cluster_daily_vcore_seconds, 2),
                round(users[user]['total_MB_seconds'], 0),
                round(100 * users[user]['total_MB_seconds'] / total_mb_seconds,
                      2),
                round(
                    100 * users[user]['total_MB_seconds'] /
                    cluster_daily_megabyte_memory_seconds, 2),
                datetime.fromtimestamp(
                    users[user]['user_first_task_started_time_ms'] /
                    1000.0).strftime('%Y-%m-%d %H:%M'),
                last_task_finished_time_string,
            ]

            table.append(row)

        df = pd.DataFrame(table, columns=header)
        df = df.sort_values(by='used_MB_seconds', ascending=False)

        print()
        print('analysis timestamp: ' + analysis_timestamp)
        # print('functional account:', job_user)
        print('jobs date: ' + begin_date.strftime('%Y-%m-%d'))
        print('----------------------')
        print('count of yarn apps: ' + str(total_yarn_apps))
        print(
            'overall daily jobs started time ',
            datetime.fromtimestamp(overall_started_time_ms /
                                   1000.0).strftime('%Y-%m-%d %H:%M'))
        print(
            'overall daily jobs finished time',
            datetime.fromtimestamp(overall_finished_time_ms /
                                   1000.0).strftime('%Y-%m-%d %H:%M'))
        print()

        print(tabulate(df, headers='keys', showindex=False))
        df.to_csv(output_path, index=False)
Пример #9
0
import urllib2
reload(sys)
import os
from yarn_api_client import ResourceManager
import datetime
if __name__ == '__main__':
    Url = "http://172.16.11.225:9091/action/receiver.do"
    endtime = datetime.datetime.now()
    starttime = endtime-datetime.timedelta(days=7)
    
    print starttime,endtime
    starttimestamp = int(time.mktime(starttime.timetuple())*1000)
    endtimestamp = int(time.mktime(endtime.timetuple())*1000)
    print starttimestamp,endtimestamp
    try :#访问yarn取得任务信息
        monitor=ResourceManager("172.16.11.209", 8088, 30)    
        out = monitor.cluster_applications(None, None, None, None, None, None, None, str(starttimestamp), str(endtimestamp))
    except :
        monitor=ResourceManager("172.16.11.208", 8088, 30)
        out = monitor.cluster_applications(None, None, None, None, None, None, None, str(starttimestamp), str(endtimestamp))
    if out.data['apps'] is  None :
        print 'error'
        exit()
    applicationlist = out.data['apps']['app']
    excutorMap = {}#存储每个user的消耗资源总量
    for applicationinfo in applicationlist:
        user = applicationinfo['user']
        vcoreSeconds = int(applicationinfo['vcoreSeconds'])
        if excutorMap.has_key(user) :
            excutorMap[user] = excutorMap[user] + vcoreSeconds
        else :
Пример #10
0
def get_state(application_id):
    rm = ResourceManager([config.yarn_url])
    response = rm.cluster_application(application_id).data
    app = response['app']
    state = app['state']
    return state
Пример #11
0
class HadoopJobPoller:

    _NAME_ = "Mapr Hadoop Job Poller"

    def __init__(self, config):
        logger.info('Initialise {}'.format(self.get_name()))
        self.config = config
        self.configure()
        self.result = []

    def load_config(self, config):
        logger.debug('Loading config: {}'.format(config))
        self.config = config
        self.configure()

    def get_name(self):
        return HadoopJobPoller._NAME_

    def configure(self):
        resource_manager_address = self.config.get('resource_manager_address')
        port = self.config.get('port')
        if port:
            self.resource_manager = ResourceManager(
                address=resource_manager_address, port=port)
            self.app_master = ApplicationMaster(
                address=resource_manager_address, port=port)
        else:
            self.resource_manager = ResourceManager(
                address=resource_manager_address)
            self.app_master = ApplicationMaster(
                address=resource_manager_address)

        self.application_ids = self.config.get('application_ids')
        self.application_status = self.config.get('application_status')
        self.application_tags = self.config.get('application_tags')
        self.application_names = self.config.get('application_names')
        self.application_status_list = []

    def __update_result(self, result={}):
        result.update({'time': time.time()})
        self.result.append(result)

    def poll(self):
        logger.info("Starting {} poll".format(self.get_name()))
        try:
            self.__application_details()
            success_status = {
                "status": "COMPLETED",
                "status_message": "Hadoop Job poll completed successfully",
                "applications_status": self.application_status_list
            }
            logger.info("Successfully completed {} poll".format(
                self.get_name()))
            return self.result, success_status

        except Exception as e:
            logger.error("Exception in {} poll :{}".format(
                self.get_name(), str(e)))
            exception_status = {
                "status": "EXCEPTION",
                "status_message": str(e)
            }

            return self.result, exception_status

    def __application_details(self):
        self.cluster_id = self.resource_manager.cluster_information().data.get(
            'clusterInfo').get('id')
        app_list = self.resource_manager.cluster_applications().data.get(
            'apps')

        if app_list:
            for app in app_list.get('app'):

                if app.get('state').lower() == 'running':
                    jobs = (self.app_master.jobs(
                        app.get('id')).data.get('jobs').get('job'))
                    result_job_list = []
                    for job in jobs:
                        task = self.app_master.job_tasks(
                            app.get('id'), job.get('id')).data
                        job.update(task)
                        result_job_list.append(job)
                    app.update({"jobs": result_job_list})

                # Condition fetch application based on id
                if (self.application_names
                        is not None) and (app.get('name')
                                          not in self.application_names):
                    continue

                # Condition fetch application based on id
                if (self.application_ids
                        is not None) and (app.get('id')
                                          not in self.application_ids):
                    continue

                # Condition fetch application based on id
                if (self.application_status is not None) and (
                        app.get('state').lower() != self.application_status):
                    continue

                # Condition fetch application based on id
                if (self.application_tags
                        is not None) and (app.get('applicationTags')
                                          not in self.application_tags):
                    continue
                app_status = {
                    'application_id': app.get('id'),
                    'application_name': app.get('name'),
                    'status': app.get('state')
                }
                self.application_status_list.append(app_status)
                self.__update_result(app)
Пример #12
0
class ZabbixHadoop:
    def __init__(self, zaddr=ZABBIX_ADDR,zport=ZABBIX_PORT,iface=None):
        self._API_TYPE = {
            1: {
                'API_ID': 'clusterInfo',
                'API_PREFIX': 'RM',
                'API_ADDRESS': 'http://RMADDRESS:8088/ws/v1/cluster/info',
                'KEY_PREFIX': 'Info'
            },
            2: {
                'API_ID': 'clusterMetrics',
                'API_PREFIX': 'RM',
                'API_ADDRESS': 'http://RMADDRESS:8088/ws/v1/cluster/metrics',
                'KEY_PREFIX': 'Metrics'
            },
            3:{
                'API_ID': 'scheduler',
                'API_PREFIX': 'RM',
                'API_ADDRESS': 'http://RMADDRESS:8088/ws/v1/cluster/scheduler',
                'KEY_PREFIX': 'Scheduler'
            },
            4:{
                'API_ID': 'apps',
                'API_PREFIX': 'RM',
                'API_ADDRESS': 'http://RMADDRESS:8088/ws/v1/cluster/apps',
                'KEY_PREFIX': 'Apps'
            },
            5: {
                'API_ID':'appStatInfo',
                'API_PREFIX':'RM',
                'API_ADDRESS':'http://RMADDRESS:8088/ws/v1/cluster/appstatistics',
                'KEY_PREFIX':'AppStatInfo'
            },
            6: {
                'API_ID':'nodes',
                'API_PREFIX':'RM',
                'API_ADDRESS':'http://RMADDRESS:8088/ws/v1/cluster/nodes',
                'KEY_PREFIX':'Nodes'
            },
        }
        self._type = 1
        self._activerm = self._get_activerm()
        # self.apitype= apitype
        self.zaddr = zaddr
        self.zport = zport
        self.ret_result = []
        self.final_result_dict ={}
        self.zbserver = ZabbixSender(zaddr, zport)
        self._ip = self._getLocalIP(iface)
        self.rm = ResourceManager(address=self._activerm,timeout=10)

    def _getLocalIP(self,iface):
        for i in interfaces():
            if i == iface:
                return ifaddresses(i)[2][0]['addr']

    def collect_app_stats(self, state_list=None, type_list=None):
        self._type = 5
        self.final_result_dict ={}
        self.ret_result= self.rm.cluster_application_statistics(state_list=state_list, application_type_list=state_list).data[
            self._API_TYPE[self._type]['API_ID']
        ]['statItem']

        for i in self.ret_result:
            self.final_result_dict[i['state']] = i['count']
        if len(self.final_result_dict) != 0:
            self._send_zabbix()
    def colletc_app_metric(self,state=None, final_status=None,
                           user=None, queue=None, limit=None,
                           started_time_begin=None, started_time_end=None,
                           finished_time_begin=None, finished_time_end=None):
        self._type = 4
        self.final_result_dict ={}
        self.ret_result = self.rm.cluster_applications(state=None, final_status=None,
                                                       user=None, queue=None, limit=None,
                                                       started_time_begin=None, started_time_end=None,
                                                       finished_time_begin=None, finished_time_end=None).data[
            self._API_TYPE[self._type]['API_ID']
        ]['app']
        for i in self.ret_result:
            if i['finalStatus']==u'FAILED' or i['finalStatus']==u'KILLED' :
                if self.final_result_dict.has_key(i['finalStatus']):
                    self.final_result_dict[i['finalStatus']] = '%s, %s:%s:%s' % (self.final_result_dict[i['finalStatus']], i['user'],i['name'],i['queue'])
                else:
                    self.final_result_dict[i['finalStatus']] = '%s:%s:%s' % (i['user'],i['name'],i['queue'])
        if len(self.final_result_dict) != 0:
            self._send_zabbix()

    def collect_cluster_metrics(self):
        self._type = 2
        self.final_result_dict = {}
        self.ret_result = self.rm.cluster_metrics().data[
            self._API_TYPE[self._type]['API_ID']
        ]
        self.final_result_dict['mem_usage'] = self.ret_result['allocatedMB']/self.ret_result['totalMB']
        self.final_result_dict['vcore_usage'] = self.ret_result['allocatedVirtualCores']/self.ret_result['totalVirtualCores']
        self.final_result_dict['unhealthyNodes'] = self.ret_result['unhealthyNodes']
        if len(self.final_result_dict) != 0:
            self._send_zabbix()

    def collect_scheduler_metrics(self):
        self._type  = 3
        self.final_result_dict = {}
        self.ret_result = self.rm.cluster_scheduler().data[
            self._API_TYPE[self._type]['API_ID']
        ]['schedulerInfo']
        self.final_result_dict['root_used_capacity'] = self.ret_result['usedCapacity']
        for index,queue in enumerate(self.ret_result['queues']['queue']):
            self.final_result_dict['queue'+str(index)+'_load'] = queue['usedCapacity']
        if len(self.final_result_dict) != 0:
            self._send_zabbix()

    def _send_zabbix(self):
        packet = ZabbixPacket()
        for k,v in self.final_result_dict.iteritems():
            packet.add(self._API_TYPE[self._type]['API_PREFIX']+'_'+self._ip, self._API_TYPE[self._type]['KEY_PREFIX']+'['+k+']', v)
        return 0
        self.zbserver.send(packet)
        print self.zbserver.status

    def _get_activerm(self):
        for addr in RM_ADDR:
            ret_val = requests.get(self._API_TYPE[self._type]['API_ADDRESS'].replace('RMADDRESS',addr))
            if ret_val.status_code == 200:
                json_val = ret_val.json()[self._API_TYPE[self._type]['API_ID']]
                if json_val['haState'] == 'ACTIVE' and json_val['state'] == 'STARTED':
                    return  addr
Пример #13
0
class ZabbixHadoop:
    def __init__(self, zaddr=ZABBIX_ADDR, zport=ZABBIX_PORT, iface=None):
        self._API_TYPE = {
            1: {
                'API_ID': 'clusterInfo',
                'API_PREFIX': 'RM',
                'API_ADDRESS': 'http://RMADDRESS:8088/ws/v1/cluster/info',
                'KEY_PREFIX': 'Info'
            },
            2: {
                'API_ID': 'clusterMetrics',
                'API_PREFIX': 'RM',
                'API_ADDRESS': 'http://RMADDRESS:8088/ws/v1/cluster/metrics',
                'KEY_PREFIX': 'Metrics'
            },
            3: {
                'API_ID': 'scheduler',
                'API_PREFIX': 'RM',
                'API_ADDRESS': 'http://RMADDRESS:8088/ws/v1/cluster/scheduler',
                'KEY_PREFIX': 'Scheduler'
            },
            4: {
                'API_ID': 'apps',
                'API_PREFIX': 'RM',
                'API_ADDRESS': 'http://RMADDRESS:8088/ws/v1/cluster/apps',
                'KEY_PREFIX': 'Apps'
            },
            5: {
                'API_ID': 'appStatInfo',
                'API_PREFIX': 'RM',
                'API_ADDRESS':
                'http://RMADDRESS:8088/ws/v1/cluster/appstatistics',
                'KEY_PREFIX': 'AppStatInfo'
            },
            6: {
                'API_ID': 'nodes',
                'API_PREFIX': 'RM',
                'API_ADDRESS': 'http://RMADDRESS:8088/ws/v1/cluster/nodes',
                'KEY_PREFIX': 'Nodes'
            },
        }
        self._type = 1
        self._activerm = self._get_activerm()
        # self.apitype= apitype
        self.zaddr = zaddr
        self.zport = zport
        self.ret_result = []
        self.final_result_dict = {}
        self.zbserver = ZabbixSender(zaddr, zport)
        self._ip = self._getLocalIP(iface)
        self.rm = ResourceManager(address=self._activerm, timeout=10)

    def _getLocalIP(self, iface):
        for i in interfaces():
            if i == iface:
                return ifaddresses(i)[2][0]['addr']

    def collect_app_stats(self, state_list=None, type_list=None):
        self._type = 5
        self.final_result_dict = {}
        self.ret_result = self.rm.cluster_application_statistics(
            state_list=state_list, application_type_list=state_list).data[
                self._API_TYPE[self._type]['API_ID']]['statItem']

        for i in self.ret_result:
            self.final_result_dict[i['state']] = i['count']
        if len(self.final_result_dict) != 0:
            self._send_zabbix()

    def colletc_app_metric(self,
                           state=None,
                           final_status=None,
                           user=None,
                           queue=None,
                           limit=None,
                           started_time_begin=None,
                           started_time_end=None,
                           finished_time_begin=None,
                           finished_time_end=None):
        self._type = 4
        self.final_result_dict = {}
        self.ret_result = self.rm.cluster_applications(
            state=None,
            final_status=None,
            user=None,
            queue=None,
            limit=None,
            started_time_begin=None,
            started_time_end=None,
            finished_time_begin=None,
            finished_time_end=None).data[self._API_TYPE[self._type]
                                         ['API_ID']]['app']
        for i in self.ret_result:
            if i['finalStatus'] == u'FAILED' or i['finalStatus'] == u'KILLED':
                if self.final_result_dict.has_key(i['finalStatus']):
                    self.final_result_dict[
                        i['finalStatus']] = '%s, %s:%s:%s' % (
                            self.final_result_dict[i['finalStatus']],
                            i['user'], i['name'], i['queue'])
                else:
                    self.final_result_dict[i['finalStatus']] = '%s:%s:%s' % (
                        i['user'], i['name'], i['queue'])
        if len(self.final_result_dict) != 0:
            self._send_zabbix()

    def collect_cluster_metrics(self):
        self._type = 2
        self.final_result_dict = {}
        self.ret_result = self.rm.cluster_metrics().data[self._API_TYPE[
            self._type]['API_ID']]
        self.final_result_dict['mem_usage'] = self.ret_result[
            'allocatedMB'] / self.ret_result['totalMB']
        self.final_result_dict['vcore_usage'] = self.ret_result[
            'allocatedVirtualCores'] / self.ret_result['totalVirtualCores']
        self.final_result_dict['unhealthyNodes'] = self.ret_result[
            'unhealthyNodes']
        if len(self.final_result_dict) != 0:
            self._send_zabbix()

    def collect_scheduler_metrics(self):
        self._type = 3
        self.final_result_dict = {}
        self.ret_result = self.rm.cluster_scheduler().data[self._API_TYPE[
            self._type]['API_ID']]['schedulerInfo']
        self.final_result_dict['root_used_capacity'] = self.ret_result[
            'usedCapacity']
        for index, queue in enumerate(self.ret_result['queues']['queue']):
            self.final_result_dict['queue' + str(index) +
                                   '_load'] = queue['usedCapacity']
        if len(self.final_result_dict) != 0:
            self._send_zabbix()

    def _send_zabbix(self):
        packet = ZabbixPacket()
        for k, v in self.final_result_dict.iteritems():
            packet.add(
                self._API_TYPE[self._type]['API_PREFIX'] + '_' + self._ip,
                self._API_TYPE[self._type]['KEY_PREFIX'] + '[' + k + ']', v)
        return 0
        self.zbserver.send(packet)
        print self.zbserver.status

    def _get_activerm(self):
        for addr in RM_ADDR:
            ret_val = requests.get(
                self._API_TYPE[self._type]['API_ADDRESS'].replace(
                    'RMADDRESS', addr))
            if ret_val.status_code == 200:
                json_val = ret_val.json()[self._API_TYPE[self._type]['API_ID']]
                if json_val['haState'] == 'ACTIVE' and json_val[
                        'state'] == 'STARTED':
                    return addr
Пример #14
0
# 文件名称:cpu_monitor.py
# 功能描述:
# 1.监控华为yarn资源
# 2.监控119、120主机硬盘空间
# 功能描述:
# 功能描述:
# 输 入 表:
# 输 出 表:
# 创 建 者:hyn
# 创建日期:20191023
# 修改日志:
# 修改日期:
# ***************************************************************************
# 程序调用格式:python cpu_monitor.py
# ***************************************************************************

import os
import sys
import time
import json
import datetime

result_list = []

from yarn_api_client import ApplicationMaster, HistoryServer, NodeManager, ResourceManager

#rm =ResourceManager(address='10.93.171.97',port='8088')
#rm =ResourceManager(service_endpoints='10.93.171.97',port='8088')
rm = ResourceManager(service_endpoints='10.93.171.97:8088')
print(rm.cluster_applications().data)
Пример #15
0
# 创 建 者:hyn
# 创建日期:20200917
# 修改日志:
# 修改日期:
# ***************************************************************************
# 程序调用格式:python yarn_monitor.py
# ***************************************************************************

import os
import sys
import json
import time

from yarn_api_client import ApplicationMaster, HistoryServer, NodeManager, ResourceManager

rm = ResourceManager(service_endpoints=['http://172.19.168.100:8088', 'http://172.19.168.4:8088'])
# print rm.cluster_information().data
#
# ApplicationMaster()
#
# NodeManager.

# 过滤重要任务
ats = 'ats'
thritf = 'Thrift'
dis='dis'

# 3个小时之前
# run_time = 10800

# 24小时之前
Пример #16
0
def get_info(application_id):
    rm = ResourceManager([config.yarn_url])
    response = rm.cluster_application(application_id).data
    app = response['app']
    return app