def dn_stop(cluster_id): ip = get_system_ip() sql = "select id, status, web_port, rpyc_port from hdfs_hdfs where ip='%s' and type=0 and cluster_id=%d" % (ip, cluster_id) rs = run_services.get_service_status(sql) id = rs[0] status = rs[1] web_port = rs[2] rpyc_port = rs[3] if status == "RUNNING": result = run_services.run_bigdata_services(datanode_stop_cmd, 'Datanode Stopped', 'Error Stopping Datanode') return run_services.confirm_stop(result, "hdfs_hdfs", id, web_port, rpyc_port) else: return '{"success": 1, "msg": ["Datanode Already Stopped!!!"]}'
def nn_start(cluster_id): ip = get_system_ip() sql = "select id, status, web_port, rpyc_port from hdfs_hdfs where ip='%s' and type=1 and cluster_id=%d" % (ip, cluster_id) rs = run_services.get_service_status(sql) id = rs[0] status = rs[1] web_port = rs[2] rpyc_port = rs[3] if status != "RUNNING": result = run_services.run_bigdata_services(namenode_start_cmd, 'Namenode Started', 'Error Starting Namenode') update_namenode_info() return run_services.confirm_start(result, "hdfs_hdfs", id, web_port, rpyc_port) else: return '{"success": 1, "msg": ["Namenode Already Running!!!"]}'
def es_start(cluster_id): ip = get_system_ip() sql = "select id, status, web_port, rpyc_port from elastic_search_elastic_search where ip='%s' and cluster_id=%d" % (ip, cluster_id) rs = run_services.get_service_status(sql) id = rs[0] status = rs[1] web_port = rs[2] rpyc_port = rs[3] if status != "RUNNING": es_process_name = get_es_process_name() es_start_cmd = es_start_systemd_cmd if es_process_name == "systemd" else es_start_sysvinit_cmd if es_process_name == "sysv_init" else es_start_bin_cmd result = run_services.run_bigdata_services(es_start_cmd, 'ES Started', 'Error Starting ES') return run_services.confirm_start(result, "elastic_search_elastic_search", id, web_port, rpyc_port) else: return '{"success": 1, "msg": ["ES Already Running!!!"]}'
def sps_stop(cluster_id): ip = get_system_ip() sql = "select id, status, web_port, rpyc_port from spark_spark where ip='%s' and type=0 and cluster_id=%d" % ( ip, cluster_id) rs = run_services.get_service_status(sql) id = rs[0] status = rs[1] web_port = rs[2] rpyc_port = rs[3] if status == "RUNNING": result = run_services.run_bigdata_services( stop_spark_slave_cmd, 'spark slave stopped', 'Error Stopping Spark Slave') return run_services.confirm_stop(result, "spark_spark", id, web_port, rpyc_port) else: return '{"success": 1, "msg": ["Spark Slave Already Stopped!!!"]}'
def nm_stop(cluster_id): ip = get_system_ip() sql = "select id, status, web_port, rpyc_port from yarn_yarn where ip='%s' and type=0 and cluster_id=%d" % ( ip, cluster_id) rs = run_services.get_service_status(sql) id = rs[0] status = rs[1] web_port = rs[2] rpyc_port = rs[3] if status == "RUNNING": result = run_services.run_bigdata_services( nm_stop_cmd, 'Node manager Stopped', 'Error Stopping Node manager') update_rm_info() return run_services.confirm_stop(result, "yarn_yarn", id, web_port, rpyc_port) else: return '{"success": 1, "msg": ["Node Manager Already Stopped!!!"]}'
def spm_start(cluster_id): ip = get_system_ip() sql = "select id, status, web_port, rpyc_port from spark_spark where ip='%s' and type=1 and cluster_id=%d" % ( ip, cluster_id) rs = run_services.get_service_status(sql) id = rs[0] status = rs[1] web_port = rs[2] rpyc_port = rs[3] if status != "RUNNING": result = run_services.run_bigdata_services( start_spark_master_cmd, 'spark master started', 'Error Starting Spark Master') return run_services.confirm_start(result, "spark_spark", id, web_port, rpyc_port) else: return '{"success": 1, "msg": ["Spark Master Already Running!!!"]}'
def master_stop(cluster_id): ip = get_system_ip() sql = "select id, status, web_port, rpyc_port from hbase_hbase where ip='%s' and type=1 and cluster_id=%d" % ( ip, cluster_id) rs = run_services.get_service_status(sql) id = rs[0] status = rs[1] web_port = rs[2] rpyc_port = rs[3] if status == "RUNNING": result = run_services.run_bigdata_services( hmaster_stop_cmd, 'HMaster Stopped', 'Error Stopping HBase Master') update_hbase_master() return run_services.confirm_stop(result, "hbase_hbase", id, web_port, rpyc_port) else: return '{"success": 1, "msg": ["HMaster Already Stopped!!!"]}'
def rm_start(cluster_id): ip = get_system_ip() sql = "select id, status, web_port, rpyc_port from yarn_yarn where ip='%s' and type=1 and cluster_id=%d" % ( ip, cluster_id) rs = run_services.get_service_status(sql) id = rs[0] status = rs[1] web_port = rs[2] rpyc_port = rs[3] if status != "RUNNING": result = run_services.run_bigdata_services( rm_start_cmd, 'Resource manager Started', 'Error Starting Resource manager') update_rm_info() return run_services.confirm_start(result, "yarn_yarn", id, web_port, rpyc_port) else: return '{"success": 1, "msg": ["Resource Manager Already Running!!!"]}'
def regionserver_start(cluster_id): ip = get_system_ip() sql = "select id, status, web_port, rpyc_port from hbase_hbase where ip='%s' and type=0 and cluster_id=%d" % ( ip, cluster_id) rs = run_services.get_service_status(sql) id = rs[0] status = rs[1] web_port = rs[2] rpyc_port = rs[3] if status != "RUNNING": result = run_services.run_bigdata_services( hregionserver_start_cmd, 'Regionserver Started', 'Error Starting Regionserver') update_hbase_master() return run_services.confirm_start(result, "hbase_hbase", id, web_port, rpyc_port) else: return '{"success": 1, "msg": ["Regionserver Already Running!!!"]}'
from Custom_JMX.hbase import get_hbase from Custom_JMX.spark import get_spark from Custom_JMX.elasticsearch import get_elastic_search from System_Info.hostname_info import get_system_ip from Postgres_connection.connection import get_postgres_connection from Linux_service_management.system_statistics import write_system_statistics_history import custom_app from custom_requests import request service_list = [] my_service_process = [] system_ip = get_system_ip() def home(): ''' :return: Urls used for all services ''' path = "Urls used for all services:\n\ '/kill/service/': 'to kill running service'\n\ '/hadoop/': 'hadoop urls'\n\ '/yarn/': 'yarn urls'\n\ '/hbase/': 'hbase urls'\n\ '/spark/': 'spark urls'\n\ '/config': 'config urls'\n\ '/hdfs/': 'hdfs urls'\n\ '/es/': 'es urls'\n\
def get_hbase(): global final_hbase_data final_hbase_data = {} try: while True: global updated_at updated_at = int(str(time.time()).split(".")[0]) live_server_info = [] dead_server_info = [] global cluster_id result = subprocess.check_output([get_hbase_status], stderr=subprocess.STDOUT, shell=True).decode("utf-8") result = result.strip().split('\n') index = [i for i, s in enumerate(result) if 'active master' in s][0] result = result[index:] active = (result[0]).strip().split(":", 1)[0] if active != "active master": cluster_id = get_hbase_db_info(get_system_ip())[1] sql = """UPDATE hbase_hbase set status='{0}', state={1}, updated_at={2} WHERE type={3} and cluster_id={4} RETURNING id;""".format("SHUTDOWN", 0, updated_at, 1, cluster_id) execute_hbase_sql(sql) final_hbase_data = {'active': 0} else: active_master = ((result[0]).strip().split( ":", 1)[1]).strip().split()[0] active_master_ip = get_ip(active_master.split(":")[0]) active_id = get_hbase_db_info(active_master_ip)[0] cluster_id = get_hbase_db_info(active_master_ip)[1] sql = """UPDATE hbase_hbase set status='{0}', state={1}, updated_at={2} WHERE id={3} RETURNING id;""".format( "RUNNING", 1, updated_at, active_id) execute_hbase_sql(sql) no_of_backup_master = int((result[1]).split()[0]) if no_of_backup_master > 0: backup_master = (result[2]).strip().split()[0] backup_master_ip = get_ip(backup_master.split(":")[0]) sql = """UPDATE hbase_hbase set status='{0}', state={1}, updated_at={2} where type=1 and cluster_id={3} and ip='{4}' RETURNING id;""".format("RUNNING", 0, updated_at, cluster_id, backup_master_ip) execute_hbase_sql(sql) no_of_live_servers = int(result[3].split()[0]) i = 4 else: sql = """UPDATE hbase_hbase set status='{0}', state={1}, updated_at={2} where type=1 and cluster_id={3} and ip <> '{4}' RETURNING id;""".format("SHUTDOWN", 0, updated_at, cluster_id, active_master_ip) execute_hbase_sql(sql) no_of_live_servers = int(result[2].split()[0]) i = 3 no_of_threadpool = 0 if no_of_live_servers > 0: no_of_threadpool = int(no_of_live_servers / 10) + 1 while no_of_live_servers > 0: key_index = i value_index = i + 1 key = (result[key_index]).strip().split()[0] value = (result[value_index]).strip() value = value + ', ip=%s:%s' % (get_ip( key.split(":")[0]), key.split(":")[1]) live_server_info.append(value) i = i + 2 no_of_live_servers = no_of_live_servers - 1 if live_server_info: pool = ThreadPool(no_of_threadpool) pool.map(hbase_liveserver_update, live_server_info, 10) pool.close() no_of_dead_servers = int((result[i]).split()[0]) if no_of_dead_servers > 0: while no_of_dead_servers > 0: dead_server = (result[i + 1]).strip().split(",")[0] dead_server_ip = get_ip(dead_server) sql = """UPDATE hbase_hbase set status='{0}', state={1}, updated_at={2} where type=0 and cluster_id={3} and ip='{4}' RETURNING id;""".format( "SHUTDOWN", 0, updated_at, cluster_id, dead_server_ip) hbase_id = execute_hbase_sql(sql) metrics_sql = """INSERT INTO hbase_metrics (max_heap, mem_store_size, online_region, read_request, no_store_files, no_stores, updated_at, used_heap, write_request, store_file_size, node_id) VALUES ('{0}', {1}, '{2}', {3}, {4}, {5}, {6}, {7}, {8}, {9}, {10}) RETURNING id;""".format( 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, updated_at, 0.0, 0.0, 0.0, hbase_id) execute_hbase_sql(metrics_sql) dead_server_info.append(dead_server_ip) i = i + 1 no_of_dead_servers = no_of_dead_servers - 1 time.sleep(10) except Exception as e: log.error(e)
def update_hbase_master(): try: updated_at = int(str(time.time()).split(".")[0]) result = subprocess.check_output([get_hbase_status], stderr=subprocess.STDOUT, shell=True).decode("utf-8") result = result.strip().split('\n') index = None count = 0 while index is None: result = subprocess.check_output([get_hbase_status], stderr=subprocess.STDOUT, shell=True).decode("utf-8") result = result.strip().split('\n') try: index = [ i for i, s in enumerate(result) if 'active master' in s ][0] except Exception as e: log.error(e) if index is not None: break time.sleep(1) count = count + 1 if count == 5: return { "success": 0, "msg": [ "Could not get master status, please check your log and restart again!!!" ] } result = result[index:] active = (result[0]).strip().split(":", 1)[0] if active != "active master": cluster_id = get_hbase_db_info(get_system_ip())[1] sql = """UPDATE hbase_hbase set status='{0}', state={1}, updated_at={2} WHERE type={3} and cluster_id={4} RETURNING id;""".format("SHUTDOWN", 0, updated_at, 1, cluster_id) execute_hbase_sql(sql) else: active_master = ((result[0]).strip().split( ":", 1)[1]).strip().split()[0] active_master_ip = get_ip(active_master.split(":")[0]) active_id = get_hbase_db_info(active_master_ip)[0] cluster_id = get_hbase_db_info(active_master_ip)[1] sql = """UPDATE hbase_hbase set status='{0}', state={1}, updated_at={2} WHERE id={3} RETURNING id;""".format( "RUNNING", 1, updated_at, active_id) execute_hbase_sql(sql) no_of_backup_master = int((result[1]).split()[0]) if no_of_backup_master > 0: backup_master = (result[2]).strip().split()[0] backup_master_ip = get_ip(backup_master.split(":")[0]) sql = """UPDATE hbase_hbase set status='{0}', state={1}, updated_at={2} where type=1 and cluster_id={3} and ip='{4}' RETURNING id;""".format( "RUNNING", 0, updated_at, cluster_id, backup_master_ip) execute_hbase_sql(sql) else: sql = """UPDATE hbase_hbase set status='{0}', state={1}, updated_at={2} where type=1 and cluster_id={3} and ip <> '{4}' RETURNING id;""".format("SHUTDOWN", 0, updated_at, cluster_id, active_master_ip) execute_hbase_sql(sql) except Exception as e: log.error(e)