示例#1
0
    def start_cluster(self, cluster):
        nn = utils.get_namenode(cluster)
        run.format_namenode(nn)
        run.start_hadoop_process(nn, 'namenode')

        rm = utils.get_resourcemanager(cluster)
        run.start_yarn_process(rm, 'resourcemanager')

        for dn in utils.get_datanodes(cluster):
            run.start_hadoop_process(dn, 'datanode')

        run.await_datanodes(cluster)

        for nm in utils.get_nodemanagers(cluster):
            run.start_yarn_process(nm, 'nodemanager')

        hs = utils.get_historyserver(cluster)
        if hs:
            run.start_historyserver(hs)

        oo = utils.get_oozie(cluster)
        if oo:
            run.start_oozie_process(oo)

        self._set_cluster_info(cluster)
示例#2
0
def decommission_nodes(cluster, instances):
    dec_hosts = [i.fqdn() for i in instances]
    dn_hosts = [dn.fqdn() for dn in u.get_datanodes(cluster)]
    nm_hosts = [nm.fqdn() for nm in u.get_nodemanagers(cluster)]

    client = c.IntelClient(u.get_instance(cluster, 'manager'), cluster.name)

    dec_dn_hosts = []
    for dec_host in dec_hosts:
        if dec_host in dn_hosts:
            dec_dn_hosts.append(dec_host)

    if dec_dn_hosts:
        client.services.hdfs.decommission_nodes(dec_dn_hosts)

        #TODO(alazarev) make timeout configurable (bug #1262897)
        timeout = 14400  # 4 hours
        cur_time = 0
        for host in dec_dn_hosts:
            while cur_time < timeout:
                if client.services.hdfs.get_datanode_status(
                        host) == 'Decomissioned':
                    break
                context.sleep(5)
                cur_time += 5
            else:
                LOG.warn("Failed to decomission node '%s' of cluster '%s' "
                         "in %s minutes" % (host, cluster.name, timeout / 60))

    client.nodes.stop(dec_hosts)

    # wait stop services
    #TODO(alazarev) make timeout configurable (bug #1262897)
    timeout = 600  # 10 minutes
    cur_time = 0
    for instance in instances:
        while cur_time < timeout:
            stopped = True

            if instance.fqdn() in dn_hosts:
                stopped = stopped and _is_hadoop_service_stopped(
                    instance, 'hadoop-hdfs-datanode')

            if instance.fqdn() in nm_hosts:
                stopped = stopped and _is_hadoop_service_stopped(
                    instance, 'hadoop-yarn-nodemanager')

            if stopped:
                break
            else:
                context.sleep(5)
                cur_time += 5
        else:
            LOG.warn("Failed to stop services on node '%s' of cluster '%s' "
                     "in %s minutes" % (instance, cluster.name, timeout / 60))

    for node in dec_hosts:
        LOG.info("Deleting node '%s' on cluster '%s'" % (node, cluster.name))
        client.nodes.delete(node)
示例#3
0
def _configure_services(client, cluster):
    nn_host = u.get_namenode(cluster).fqdn()
    snn = u.get_secondarynamenodes(cluster)
    snn_host = snn[0].fqdn() if snn else None
    rm_host = u.get_resourcemanager(cluster).fqdn() if u.get_resourcemanager(
        cluster) else None
    hs_host = u.get_historyserver(cluster).fqdn() if u.get_historyserver(
        cluster) else None
    dn_hosts = [dn.fqdn() for dn in u.get_datanodes(cluster)]
    nm_hosts = [tt.fqdn() for tt in u.get_nodemanagers(cluster)]

    oozie_host = u.get_oozie(cluster).fqdn() if u.get_oozie(cluster) else None
    hive_host = u.get_hiveserver(cluster).fqdn() if u.get_hiveserver(
        cluster) else None

    services = []
    if u.get_namenode(cluster):
        services += ['hdfs']

    if u.get_resourcemanager(cluster):
        services += ['yarn']

    if oozie_host:
        services += ['oozie']
        services += ['pig']

    if hive_host:
        services += ['hive']

    LOG.debug("Add services: %s" % ', '.join(services))
    client.services.add(services)

    LOG.debug("Assign roles to hosts")
    client.services.hdfs.add_nodes('PrimaryNameNode', [nn_host])

    client.services.hdfs.add_nodes('DataNode', dn_hosts)
    if snn:
        client.services.hdfs.add_nodes('SecondaryNameNode', [snn_host])

    if oozie_host:
        client.services.oozie.add_nodes('Oozie', [oozie_host])

    if hive_host:
        client.services.hive.add_nodes('HiveServer', [hive_host])

    if rm_host:
        client.services.yarn.add_nodes('ResourceManager', [rm_host])
        client.services.yarn.add_nodes('NodeManager', nm_hosts)

    if hs_host:
        client.services.yarn.add_nodes('HistoryServer', [hs_host])
示例#4
0
def _update_include_files(cluster):
    instances = u.get_instances(cluster)

    datanodes = u.get_datanodes(cluster)
    nodemanagers = u.get_nodemanagers(cluster)
    dn_hosts = u.generate_fqdn_host_names(datanodes)
    nm_hosts = u.generate_fqdn_host_names(nodemanagers)
    for instance in instances:
        with instance.remote() as r:
            r.execute_command(
                'sudo su - -c "echo \'%s\' > %s/dn-include" hadoop' % (
                    dn_hosts, HADOOP_CONF_DIR))
            r.execute_command(
                'sudo su - -c "echo \'%s\' > %s/nm-include" hadoop' % (
                    nm_hosts, HADOOP_CONF_DIR))
示例#5
0
def scale_cluster(cluster, instances):
    scale_ins_hosts = [i.fqdn() for i in instances]
    dn_hosts = [dn.fqdn() for dn in u.get_datanodes(cluster)]
    nm_hosts = [nm.fqdn() for nm in u.get_nodemanagers(cluster)]
    to_scale_dn = []
    to_scale_nm = []
    for i in scale_ins_hosts:
        if i in dn_hosts:
            to_scale_dn.append(i)

        if i in nm_hosts:
            to_scale_nm.append(i)

    client = c.IntelClient(u.get_instance(cluster, 'manager'), cluster.name)
    rack = '/Default'
    client.nodes.add(scale_ins_hosts, rack, 'hadoop',
                     '/home/hadoop/.ssh/id_rsa')
    client.cluster.install_software(scale_ins_hosts)

    if to_scale_nm:
        client.services.yarn.add_nodes('NodeManager', to_scale_nm)

    if to_scale_dn:
        client.services.hdfs.add_nodes('DataNode', to_scale_dn)

    # IDH 3.0.2 reset cluster parameters (bug #1300603)
    # Restoring them back
    LOG.info("Provisioning configs")
    # cinder and ephemeral drive support
    _configure_storage(client, cluster)
    # swift support
    _configure_swift(client, cluster)
    # user configs
    _add_user_params(client, cluster)

    client.nodes.config()

    if to_scale_dn:
        client.services.hdfs.start()

    if to_scale_nm:
        client.services.yarn.start()