Пример #1
0
def _configure_services(client, cluster):
    nn_host = u.get_namenode(cluster).fqdn()
    snn = u.get_secondarynamenodes(cluster)
    snn_host = snn[0].fqdn() if snn else None
    rm_host = u.get_resourcemanager(cluster).fqdn() if u.get_resourcemanager(
        cluster) else None
    hs_host = u.get_historyserver(cluster).fqdn() if u.get_historyserver(
        cluster) else None
    dn_hosts = [dn.fqdn() for dn in u.get_datanodes(cluster)]
    nm_hosts = [tt.fqdn() for tt in u.get_nodemanagers(cluster)]

    oozie_host = u.get_oozie(cluster).fqdn() if u.get_oozie(cluster) else None
    hive_host = u.get_hiveserver(cluster).fqdn() if u.get_hiveserver(
        cluster) else None

    services = []
    if u.get_namenode(cluster):
        services += ['hdfs']

    if u.get_resourcemanager(cluster):
        services += ['yarn']

    if oozie_host:
        services += ['oozie']
        services += ['pig']

    if hive_host:
        services += ['hive']

    LOG.debug("Add services: %s" % ', '.join(services))
    client.services.add(services)

    LOG.debug("Assign roles to hosts")
    client.services.hdfs.add_nodes('PrimaryNameNode', [nn_host])

    client.services.hdfs.add_nodes('DataNode', dn_hosts)
    if snn:
        client.services.hdfs.add_nodes('SecondaryNameNode', [snn_host])

    if oozie_host:
        client.services.oozie.add_nodes('Oozie', [oozie_host])

    if hive_host:
        client.services.hive.add_nodes('HiveServer', [hive_host])

    if rm_host:
        client.services.yarn.add_nodes('ResourceManager', [rm_host])
        client.services.yarn.add_nodes('NodeManager', nm_hosts)

    if hs_host:
        client.services.yarn.add_nodes('HistoryServer', [hs_host])
Пример #2
0
    def _set_cluster_info(self, cluster):
        nn = utils.get_namenode(cluster)
        rm = utils.get_resourcemanager(cluster)
        hs = utils.get_historyserver(cluster)
        oo = utils.get_oozie(cluster)

        info = {}

        if rm:
            info['YARN'] = {
                'Web UI': 'http://%s:%s' % (rm.management_ip, '8088'),
                'ResourceManager': 'http://%s:%s' % (rm.management_ip, '8032')
            }

        if nn:
            info['HDFS'] = {
                'Web UI': 'http://%s:%s' % (nn.management_ip, '50070'),
                'NameNode': 'hdfs://%s:%s' % (nn.hostname(), '9000')
            }

        if oo:
            info['JobFlow'] = {
                'Oozie': 'http://%s:%s' % (oo.management_ip, '11000')
            }

        if hs:
            info['MapReduce JobHistory Server'] = {
                'Web UI': 'http://%s:%s' % (hs.management_ip, '19888')
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Пример #3
0
    def start_cluster(self, cluster):
        nn = utils.get_namenode(cluster)
        run.format_namenode(nn)
        run.start_hadoop_process(nn, 'namenode')

        rm = utils.get_resourcemanager(cluster)
        run.start_yarn_process(rm, 'resourcemanager')

        for dn in utils.get_datanodes(cluster):
            run.start_hadoop_process(dn, 'datanode')

        run.await_datanodes(cluster)

        for nm in utils.get_nodemanagers(cluster):
            run.start_yarn_process(nm, 'nodemanager')

        hs = utils.get_historyserver(cluster)
        if hs:
            run.start_historyserver(hs)

        oo = utils.get_oozie(cluster)
        if oo:
            run.start_oozie_process(oo)

        self._set_cluster_info(cluster)
Пример #4
0
    def _set_cluster_info(self, cluster):
        mng = u.get_instances(cluster, 'manager')[0]
        nn = u.get_namenode(cluster)
        jt = u.get_resourcemanager(cluster)
        oozie = u.get_oozie(cluster)

        #TODO(alazarev) make port configurable (bug #1262895)
        info = {
            'IDH Manager': {
                'Web UI': 'https://%s:9443' % mng.management_ip
            }
        }

        if jt:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['Yarn'] = {
                'ResourceManager Web UI': 'http://%s:8088' % jt.management_ip
            }
            #TODO(alazarev) make port configurable (bug #1262895)
            info['Yarn']['ResourceManager'] = '%s:8032' % jt.hostname()
        if nn:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['HDFS'] = {'Web UI': 'http://%s:50070' % nn.management_ip}
            #TODO(alazarev) make port configurable (bug #1262895)
            info['HDFS']['NameNode'] = 'hdfs://%s:8020' % nn.hostname()

        if oozie:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Пример #5
0
    def _set_cluster_info(self, cluster):
        mng = u.get_instances(cluster, 'manager')[0]
        nn = u.get_namenode(cluster)
        jt = u.get_resourcemanager(cluster)
        oozie = u.get_oozie(cluster)

        #TODO(alazarev) make port configurable (bug #1262895)
        info = {'IDH Manager': {
            'Web UI': 'https://%s:9443' % mng.management_ip
        }}

        if jt:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['Yarn'] = {
                'ResourceManager Web UI': 'http://%s:8088' % jt.management_ip
            }
            #TODO(alazarev) make port configurable (bug #1262895)
            info['Yarn']['ResourceManager'] = '%s:8032' % jt.hostname()
        if nn:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['HDFS'] = {
                'Web UI': 'http://%s:50070' % nn.management_ip
            }
            #TODO(alazarev) make port configurable (bug #1262895)
            info['HDFS']['NameNode'] = 'hdfs://%s:8020' % nn.hostname()

        if oozie:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Пример #6
0
def _get_hadoop_configs(node_group):
    cluster = node_group.cluster
    nn_hostname = utils.get_namenode(cluster).hostname()
    res_hostname = utils.get_resourcemanager(cluster).hostname()
    dirs = _get_hadoop_dirs(node_group)
    confs = {
        'Hadoop': {
            'fs.defaultFS': 'hdfs://%s:9000' % nn_hostname
        },
        'HDFS': {
            'dfs.namenode.name.dir': ','.join(dirs['hadoop_name_dirs']),
            'dfs.namenode.data.dir': ','.join(dirs['hadoop_data_dirs']),
            'dfs.hosts': '%s/dn-include' % HADOOP_CONF_DIR,
            'dfs.hosts.exclude': '%s/dn-exclude' % HADOOP_CONF_DIR
        },
        'YARN': {
            'yarn.nodemanager.aux-services':
            'mapreduce_shuffle',
            'yarn.resourcemanager.hostname':
            '%s' % res_hostname,
            'yarn.resourcemanager.nodes.include-path':
            '%s/nm-include' % (HADOOP_CONF_DIR),
            'yarn.resourcemanager.nodes.exclude-path':
            '%s/nm-exclude' % (HADOOP_CONF_DIR)
        },
        'MapReduce': {
            'mapreduce.framework.name': 'yarn'
        },
    }

    oozie = utils.get_oozie(cluster)
    if oozie:
        hadoop_cfg = {
            'hadoop.proxyuser.hadoop.hosts': '*',
            'hadoop.proxyuser.hadoop.groups': 'hadoop'
        }
        confs['Hadoop'].update(hadoop_cfg)

        oozie_cfg = o_helper.get_oozie_required_xml_configs(HADOOP_CONF_DIR)
        if c_helper.is_mysql_enabled(cluster):
            oozie_cfg.update(o_helper.get_oozie_mysql_configs())

        confs['JobFlow'] = oozie_cfg

    if c_helper.get_config_value(c_helper.ENABLE_SWIFT.applicable_target,
                                 c_helper.ENABLE_SWIFT.name, cluster):
        swift_configs = {}
        for config in swift.get_swift_configs():
            swift_configs[config['name']] = config['value']

        confs['Hadoop'].update(swift_configs)

    if c_helper.is_data_locality_enabled(cluster):
        confs['Hadoop'].update(th.TOPOLOGY_CONFIG)
        confs['Hadoop'].update(
            {"topology.script.file.name": HADOOP_CONF_DIR + "/topology.sh"})

    return confs, c_helper.get_env_configs()
Пример #7
0
def _get_hadoop_configs(node_group):
    cluster = node_group.cluster
    nn_hostname = utils.get_namenode(cluster).hostname()
    res_hostname = utils.get_resourcemanager(cluster).hostname()
    dirs = _get_hadoop_dirs(node_group)
    confs = {
        'Hadoop': {
            'fs.defaultFS': 'hdfs://%s:9000' % nn_hostname
        },
        'HDFS': {
            'dfs.namenode.name.dir': ','.join(dirs['hadoop_name_dirs']),
            'dfs.namenode.data.dir': ','.join(dirs['hadoop_data_dirs']),
            'dfs.hosts': '%s/dn-include' % HADOOP_CONF_DIR,
            'dfs.hosts.exclude': '%s/dn-exclude' % HADOOP_CONF_DIR
        },
        'YARN': {
            'yarn.nodemanager.aux-services': 'mapreduce_shuffle',
            'yarn.resourcemanager.hostname': '%s' % res_hostname,
            'yarn.resourcemanager.nodes.include-path': '%s/nm-include' % (
                HADOOP_CONF_DIR),
            'yarn.resourcemanager.nodes.exclude-path': '%s/nm-exclude' % (
                HADOOP_CONF_DIR)
        },
        'MapReduce': {
            'mapreduce.framework.name': 'yarn'
        },
    }

    oozie = utils.get_oozie(cluster)
    if oozie:
        hadoop_cfg = {
            'hadoop.proxyuser.hadoop.hosts': '*',
            'hadoop.proxyuser.hadoop.groups': 'hadoop'
        }
        confs['Hadoop'].update(hadoop_cfg)

        oozie_cfg = o_helper.get_oozie_required_xml_configs(HADOOP_CONF_DIR)
        if c_helper.is_mysql_enabled(cluster):
            oozie_cfg.update(o_helper.get_oozie_mysql_configs())

        confs['JobFlow'] = oozie_cfg

    if c_helper.get_config_value(c_helper.ENABLE_SWIFT.applicable_target,
                                 c_helper.ENABLE_SWIFT.name, cluster):
        swift_configs = {}
        for config in swift.get_swift_configs():
            swift_configs[config['name']] = config['value']

        confs['Hadoop'].update(swift_configs)

    if c_helper.is_data_locality_enabled(cluster):
        confs['Hadoop'].update(th.TOPOLOGY_CONFIG)
        confs['Hadoop'].update({"topology.script.file.name":
                                HADOOP_CONF_DIR + "/topology.sh"})

    return confs, c_helper.get_env_configs()
Пример #8
0
def get_nodemanagers_status(cluster):
    statuses = {}
    resourcemanager = u.get_resourcemanager(cluster)
    status_regexp = r'^(\S+):\d+\s+(\w+)'
    matcher = re.compile(status_regexp, re.MULTILINE)
    yarn_report = resourcemanager.remote().execute_command(
        'sudo su - -c "yarn node -all -list" hadoop')[1]

    for host, status in matcher.findall(yarn_report):
        statuses[host] = status.lower()

    return statuses
Пример #9
0
def validate_additional_ng_scaling(cluster, additional):
    rm = u.get_resourcemanager(cluster)
    scalable_processes = _get_scalable_processes()

    for ng_id in additional:
        ng = gu.get_by_id(cluster.node_groups, ng_id)
        if not set(ng.node_processes).issubset(scalable_processes):
            msg = "Vanilla plugin cannot scale nodegroup with processes: %s"
            raise ex.NodeGroupCannotBeScaled(ng.name,
                                             msg % ' '.join(ng.node_processes))

        if not rm and 'nodemanager' in ng.node_processes:
            msg = ("Vanilla plugin cannot scale node group with processes "
                   "which have no master-processes run in cluster")
            raise ex.NodeGroupCannotBeScaled(ng.name, msg)
Пример #10
0
def validate_additional_ng_scaling(cluster, additional):
    rm = u.get_resourcemanager(cluster)
    scalable_processes = _get_scalable_processes()

    for ng_id in additional:
        ng = gu.get_by_id(cluster.node_groups, ng_id)
        if not set(ng.node_processes).issubset(scalable_processes):
            msg = "Vanilla plugin cannot scale nodegroup with processes: %s"
            raise ex.NodeGroupCannotBeScaled(ng.name,
                                             msg % ' '.join(ng.node_processes))

        if not rm and 'nodemanager' in ng.node_processes:
            msg = ("Vanilla plugin cannot scale node group with processes "
                   "which have no master-processes run in cluster")
            raise ex.NodeGroupCannotBeScaled(ng.name, msg)
Пример #11
0
    def _validate_additional_ng_scaling(self, cluster, additional):
        rm = u.get_resourcemanager(cluster)
        scalable_processes = self._get_scalable_processes()

        for ng_id in additional:
            ng = self._get_by_id(cluster.node_groups, ng_id)
            if not set(ng.node_processes).issubset(scalable_processes):
                raise ex.NodeGroupCannotBeScaled(
                    ng.name, "Intel plugin cannot scale nodegroup"
                    " with processes: " + ' '.join(ng.node_processes))
            if not rm and 'nodemanager' in ng.node_processes:
                raise ex.NodeGroupCannotBeScaled(
                    ng.name, "Intel plugin cannot scale node group with "
                    "processes which have no master-processes run "
                    "in cluster")
Пример #12
0
    def _validate_additional_ng_scaling(self, cluster, additional):
        rm = u.get_resourcemanager(cluster)
        scalable_processes = self._get_scalable_processes()

        for ng_id in additional:
            ng = self._get_by_id(cluster.node_groups, ng_id)
            if not set(ng.node_processes).issubset(scalable_processes):
                raise ex.NodeGroupCannotBeScaled(
                    ng.name, "Intel plugin cannot scale nodegroup"
                             " with processes: " +
                             ' '.join(ng.node_processes))
            if not rm and 'nodemanager' in ng.node_processes:
                raise ex.NodeGroupCannotBeScaled(
                    ng.name, "Intel plugin cannot scale node group with "
                             "processes which have no master-processes run "
                             "in cluster")
Пример #13
0
def start_cluster(cluster):
    client = c.IntelClient(u.get_instance(cluster, 'manager'), cluster.name)

    LOG.debug("Starting hadoop services")
    client.services.hdfs.start()

    if u.get_resourcemanager(cluster):
        client.services.yarn.start()

    if u.get_hiveserver(cluster):
        client.services.hive.start()

    if u.get_oozie(cluster):
        LOG.info("Setup oozie")
        _setup_oozie(cluster)

        client.services.oozie.start()
Пример #14
0
def refresh_yarn_nodes(cluster):
    rm = u.get_resourcemanager(cluster)
    rm.remote().execute_command(
        'sudo su - -c "yarn rmadmin -refreshNodes" hadoop')
Пример #15
0
def refresh_yarn_nodes(cluster):
    rm = u.get_resourcemanager(cluster)
    rm.remote().execute_command(
        'sudo su - -c "yarn rmadmin -refreshNodes" hadoop')