示例#1
0
def get_nodemanagers_status(cluster):
    statuses = {}
    resourcemanager = u.get_resourcemanager(cluster)
    status_regexp = r'^(\S+):\d+\s+(\w+)'
    matcher = re.compile(status_regexp, re.MULTILINE)
    yarn_report = resourcemanager.remote().execute_command(
        'sudo su - -c "yarn node -all -list" hadoop')[1]

    for host, status in matcher.findall(yarn_report):
        statuses[host] = status.lower()

    return statuses
def scale_cluster(pctx, cluster, instances):
    config.configure_instances(pctx, instances)
    _update_include_files(cluster)
    run.refresh_hadoop_nodes(cluster)
    rm = vu.get_resourcemanager(cluster)
    if rm:
        run.refresh_yarn_nodes(cluster)

    config.configure_topology_data(pctx, cluster)
    run.start_dn_nm_processes(instances)
    swift_helper.install_ssl_certs(instances)
    config.configure_zookeeper(cluster)
    run.refresh_zk_servers(cluster)
def validate_additional_ng_scaling(cluster, additional):
    rm = vu.get_resourcemanager(cluster)
    scalable_processes = _get_scalable_processes()

    for ng_id in additional:
        ng = u.get_by_id(cluster.node_groups, ng_id)
        if not set(ng.node_processes).issubset(scalable_processes):
            msg = _("Vanilla plugin cannot scale nodegroup with processes: %s")
            raise ex.NodeGroupCannotBeScaled(ng.name,
                                             msg % ' '.join(ng.node_processes))

        if not rm and 'nodemanager' in ng.node_processes:
            msg = _("Vanilla plugin cannot scale node group with processes "
                    "which have no master-processes run in cluster")
            raise ex.NodeGroupCannotBeScaled(ng.name, msg)
示例#4
0
    def _set_cluster_info(self, cluster):
        nn = vu.get_namenode(cluster)
        rm = vu.get_resourcemanager(cluster)
        hs = vu.get_historyserver(cluster)
        oo = vu.get_oozie(cluster)
        sp = vu.get_spark_history_server(cluster)
        info = {}

        if rm:
            info['YARN'] = {
                'Web UI':
                'http://%s:%s' % (rm.get_ip_or_dns_name(), '8088'),
                'ResourceManager':
                'http://%s:%s' % (rm.get_ip_or_dns_name(), '8032')
            }

        if nn:
            info['HDFS'] = {
                'Web UI': 'http://%s:%s' % (nn.get_ip_or_dns_name(), '50070'),
                'NameNode': 'hdfs://%s:%s' % (nn.hostname(), '9000')
            }

        if oo:
            info['JobFlow'] = {
                'Oozie': 'http://%s:%s' % (oo.get_ip_or_dns_name(), '11000')
            }

        if hs:
            info['MapReduce JobHistory Server'] = {
                'Web UI': 'http://%s:%s' % (hs.get_ip_or_dns_name(), '19888')
            }

        if sp:
            info['Apache Spark'] = {
                'Spark UI':
                'http://%s:%s' % (sp.management_ip, '4040'),
                'Spark History Server UI':
                'http://%s:%s' % (sp.management_ip, '18080')
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
def decommission_nodes(pctx, cluster, instances):
    datanodes = _get_instances_with_service(instances, 'datanode')
    nodemanagers = _get_instances_with_service(instances, 'nodemanager')
    _update_exclude_files(cluster, instances)

    run.refresh_hadoop_nodes(cluster)
    rm = vu.get_resourcemanager(cluster)
    if rm:
        run.refresh_yarn_nodes(cluster)

    _check_nodemanagers_decommission(cluster, nodemanagers)
    _check_datanodes_decommission(cluster, datanodes)

    _update_include_files(cluster, instances)
    _clear_exclude_files(cluster)
    run.refresh_hadoop_nodes(cluster)

    config.configure_topology_data(pctx, cluster)
    config.configure_zookeeper(cluster, instances)
    # TODO(shuyingya):should invent a way to lastly restart the leader node
    run.refresh_zk_servers(cluster, instances)
def refresh_yarn_nodes(cluster):
    rm = vu.get_resourcemanager(cluster)
    rm.remote().execute_command(
        'sudo su - -c "yarn rmadmin -refreshNodes" hadoop')
def _get_hadoop_configs(pctx, instance):
    cluster = instance.node_group.cluster
    nn_hostname = vu.get_instance_hostname(vu.get_namenode(cluster))
    dirs = _get_hadoop_dirs(instance)
    confs = {
        'Hadoop': {
            'fs.defaultFS': 'hdfs://%s:9000' % nn_hostname
        },
        'HDFS': {
            'dfs.namenode.name.dir': ','.join(dirs['hadoop_name_dirs']),
            'dfs.datanode.data.dir': ','.join(dirs['hadoop_data_dirs']),
            'dfs.hosts': '%s/dn-include' % HADOOP_CONF_DIR,
            'dfs.hosts.exclude': '%s/dn-exclude' % HADOOP_CONF_DIR
        }
    }

    res_hostname = vu.get_instance_hostname(vu.get_resourcemanager(cluster))
    if res_hostname:
        confs['YARN'] = {
            'yarn.nodemanager.aux-services':
            'mapreduce_shuffle',
            'yarn.resourcemanager.hostname':
            '%s' % res_hostname,
            'yarn.resourcemanager.nodes.include-path':
            '%s/nm-include' % (HADOOP_CONF_DIR),
            'yarn.resourcemanager.nodes.exclude-path':
            '%s/nm-exclude' % (HADOOP_CONF_DIR)
        }
        confs['MapReduce'] = {'mapreduce.framework.name': 'yarn'}
        hs_hostname = vu.get_instance_hostname(vu.get_historyserver(cluster))
        if hs_hostname:
            confs['MapReduce']['mapreduce.jobhistory.address'] = ("%s:10020" %
                                                                  hs_hostname)

    oozie = vu.get_oozie(cluster)
    if oozie:
        hadoop_cfg = {
            'hadoop.proxyuser.hadoop.hosts': '*',
            'hadoop.proxyuser.hadoop.groups': 'hadoop'
        }
        confs['Hadoop'].update(hadoop_cfg)

        oozie_cfg = oozie_helper.get_oozie_required_xml_configs(
            HADOOP_CONF_DIR)
        if config_helper.is_mysql_enabled(pctx, cluster):
            oozie_cfg.update(oozie_helper.get_oozie_mysql_configs(cluster))

        confs['JobFlow'] = oozie_cfg

    if config_helper.is_swift_enabled(pctx, cluster):
        swift_configs = {}
        for config in swift.get_swift_configs():
            swift_configs[config['name']] = config['value']

        confs['Hadoop'].update(swift_configs)

    if config_helper.is_data_locality_enabled(pctx, cluster):
        confs['Hadoop'].update(th.TOPOLOGY_CONFIG)
        confs['Hadoop'].update(
            {"topology.script.file.name": HADOOP_CONF_DIR + "/topology.sh"})

    hive_hostname = vu.get_instance_hostname(vu.get_hiveserver(cluster))
    if hive_hostname:
        hive_pass = u.get_hive_password(cluster)

        hive_cfg = {
            'hive.warehouse.subdir.inherit.perms':
            True,
            'javax.jdo.option.ConnectionURL':
            'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true'
        }

        if config_helper.is_mysql_enabled(pctx, cluster):
            hive_cfg.update({
                'javax.jdo.option.ConnectionURL':
                'jdbc:mysql://%s/metastore' % hive_hostname,
                'javax.jdo.option.ConnectionDriverName':
                'com.mysql.jdbc.Driver',
                'javax.jdo.option.ConnectionUserName':
                '******',
                'javax.jdo.option.ConnectionPassword':
                hive_pass,
                'datanucleus.autoCreateSchema':
                'false',
                'datanucleus.fixedDatastore':
                'true',
                'hive.metastore.uris':
                'thrift://%s:9083' % hive_hostname,
            })

        proxy_configs = cluster.cluster_configs.get('proxy_configs')
        if proxy_configs and config_helper.is_swift_enabled(pctx, cluster):
            hive_cfg.update({
                swift.HADOOP_SWIFT_USERNAME:
                proxy_configs['proxy_username'],
                swift.HADOOP_SWIFT_PASSWORD:
                key_manager.get_secret(proxy_configs['proxy_password']),
                swift.HADOOP_SWIFT_TRUST_ID:
                proxy_configs['proxy_trust_id'],
                swift.HADOOP_SWIFT_DOMAIN_NAME:
                CONF.proxy_user_domain_name
            })

        confs['Hive'] = hive_cfg

    return confs
示例#8
0
def start_resourcemanager(cluster):
    rm = vu.get_resourcemanager(cluster)
    if rm:
        _start_resourcemanager(rm)