def get_nodemanagers_status(cluster): statuses = {} resourcemanager = u.get_resourcemanager(cluster) status_regexp = r'^(\S+):\d+\s+(\w+)' matcher = re.compile(status_regexp, re.MULTILINE) yarn_report = resourcemanager.remote().execute_command( 'sudo su - -c "yarn node -all -list" hadoop')[1] for host, status in matcher.findall(yarn_report): statuses[host] = status.lower() return statuses
def scale_cluster(pctx, cluster, instances): config.configure_instances(pctx, instances) _update_include_files(cluster) run.refresh_hadoop_nodes(cluster) rm = vu.get_resourcemanager(cluster) if rm: run.refresh_yarn_nodes(cluster) config.configure_topology_data(pctx, cluster) run.start_dn_nm_processes(instances) swift_helper.install_ssl_certs(instances) config.configure_zookeeper(cluster) run.refresh_zk_servers(cluster)
def validate_additional_ng_scaling(cluster, additional): rm = vu.get_resourcemanager(cluster) scalable_processes = _get_scalable_processes() for ng_id in additional: ng = u.get_by_id(cluster.node_groups, ng_id) if not set(ng.node_processes).issubset(scalable_processes): msg = _("Vanilla plugin cannot scale nodegroup with processes: %s") raise ex.NodeGroupCannotBeScaled(ng.name, msg % ' '.join(ng.node_processes)) if not rm and 'nodemanager' in ng.node_processes: msg = _("Vanilla plugin cannot scale node group with processes " "which have no master-processes run in cluster") raise ex.NodeGroupCannotBeScaled(ng.name, msg)
def _set_cluster_info(self, cluster): nn = vu.get_namenode(cluster) rm = vu.get_resourcemanager(cluster) hs = vu.get_historyserver(cluster) oo = vu.get_oozie(cluster) sp = vu.get_spark_history_server(cluster) info = {} if rm: info['YARN'] = { 'Web UI': 'http://%s:%s' % (rm.get_ip_or_dns_name(), '8088'), 'ResourceManager': 'http://%s:%s' % (rm.get_ip_or_dns_name(), '8032') } if nn: info['HDFS'] = { 'Web UI': 'http://%s:%s' % (nn.get_ip_or_dns_name(), '50070'), 'NameNode': 'hdfs://%s:%s' % (nn.hostname(), '9000') } if oo: info['JobFlow'] = { 'Oozie': 'http://%s:%s' % (oo.get_ip_or_dns_name(), '11000') } if hs: info['MapReduce JobHistory Server'] = { 'Web UI': 'http://%s:%s' % (hs.get_ip_or_dns_name(), '19888') } if sp: info['Apache Spark'] = { 'Spark UI': 'http://%s:%s' % (sp.management_ip, '4040'), 'Spark History Server UI': 'http://%s:%s' % (sp.management_ip, '18080') } ctx = context.ctx() conductor.cluster_update(ctx, cluster, {'info': info})
def decommission_nodes(pctx, cluster, instances): datanodes = _get_instances_with_service(instances, 'datanode') nodemanagers = _get_instances_with_service(instances, 'nodemanager') _update_exclude_files(cluster, instances) run.refresh_hadoop_nodes(cluster) rm = vu.get_resourcemanager(cluster) if rm: run.refresh_yarn_nodes(cluster) _check_nodemanagers_decommission(cluster, nodemanagers) _check_datanodes_decommission(cluster, datanodes) _update_include_files(cluster, instances) _clear_exclude_files(cluster) run.refresh_hadoop_nodes(cluster) config.configure_topology_data(pctx, cluster) config.configure_zookeeper(cluster, instances) # TODO(shuyingya):should invent a way to lastly restart the leader node run.refresh_zk_servers(cluster, instances)
def start_resourcemanager(cluster): rm = vu.get_resourcemanager(cluster) if rm: _start_resourcemanager(rm)
def refresh_yarn_nodes(cluster): rm = vu.get_resourcemanager(cluster) rm.remote().execute_command( 'sudo su - -c "yarn rmadmin -refreshNodes" hadoop')
def _get_hadoop_configs(pctx, instance): cluster = instance.node_group.cluster nn_hostname = vu.get_instance_hostname(vu.get_namenode(cluster)) dirs = _get_hadoop_dirs(instance) confs = { 'Hadoop': { 'fs.defaultFS': 'hdfs://%s:9000' % nn_hostname }, 'HDFS': { 'dfs.namenode.name.dir': ','.join(dirs['hadoop_name_dirs']), 'dfs.datanode.data.dir': ','.join(dirs['hadoop_data_dirs']), 'dfs.hosts': '%s/dn-include' % HADOOP_CONF_DIR, 'dfs.hosts.exclude': '%s/dn-exclude' % HADOOP_CONF_DIR } } res_hostname = vu.get_instance_hostname(vu.get_resourcemanager(cluster)) if res_hostname: confs['YARN'] = { 'yarn.nodemanager.aux-services': 'mapreduce_shuffle', 'yarn.resourcemanager.hostname': '%s' % res_hostname, 'yarn.resourcemanager.nodes.include-path': '%s/nm-include' % (HADOOP_CONF_DIR), 'yarn.resourcemanager.nodes.exclude-path': '%s/nm-exclude' % (HADOOP_CONF_DIR) } confs['MapReduce'] = {'mapreduce.framework.name': 'yarn'} hs_hostname = vu.get_instance_hostname(vu.get_historyserver(cluster)) if hs_hostname: confs['MapReduce']['mapreduce.jobhistory.address'] = ("%s:10020" % hs_hostname) oozie = vu.get_oozie(cluster) if oozie: hadoop_cfg = { 'hadoop.proxyuser.hadoop.hosts': '*', 'hadoop.proxyuser.hadoop.groups': 'hadoop' } confs['Hadoop'].update(hadoop_cfg) oozie_cfg = o_helper.get_oozie_required_xml_configs(HADOOP_CONF_DIR) if c_helper.is_mysql_enabled(pctx, cluster): oozie_cfg.update(o_helper.get_oozie_mysql_configs(cluster)) confs['JobFlow'] = oozie_cfg if c_helper.is_swift_enabled(pctx, cluster): swift_configs = {} for config in swift.get_swift_configs(): swift_configs[config['name']] = config['value'] confs['Hadoop'].update(swift_configs) if c_helper.is_data_locality_enabled(pctx, cluster): confs['Hadoop'].update(th.TOPOLOGY_CONFIG) confs['Hadoop'].update( {"topology.script.file.name": HADOOP_CONF_DIR + "/topology.sh"}) hive_hostname = vu.get_instance_hostname(vu.get_hiveserver(cluster)) if hive_hostname: hive_pass = u.get_hive_password(cluster) hive_cfg = { 'hive.warehouse.subdir.inherit.perms': True, 'javax.jdo.option.ConnectionURL': 'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true' } if c_helper.is_mysql_enabled(pctx, cluster): hive_cfg.update({ 'javax.jdo.option.ConnectionURL': 'jdbc:mysql://%s/metastore' % hive_hostname, 'javax.jdo.option.ConnectionDriverName': 'com.mysql.jdbc.Driver', 'javax.jdo.option.ConnectionUserName': '******', 'javax.jdo.option.ConnectionPassword': hive_pass, 'datanucleus.autoCreateSchema': 'false', 'datanucleus.fixedDatastore': 'true', 'hive.metastore.uris': 'thrift://%s:9083' % hive_hostname, }) proxy_configs = cluster.cluster_configs.get('proxy_configs') if proxy_configs and c_helper.is_swift_enabled(pctx, cluster): hive_cfg.update({ swift.HADOOP_SWIFT_USERNAME: proxy_configs['proxy_username'], swift.HADOOP_SWIFT_PASSWORD: key_manager.get_secret(proxy_configs['proxy_password']), swift.HADOOP_SWIFT_TRUST_ID: proxy_configs['proxy_trust_id'], swift.HADOOP_SWIFT_DOMAIN_NAME: CONF.proxy_user_domain_name }) confs['Hive'] = hive_cfg return confs