Exemplo n.º 1
0
def enable_rmha(conf):

    ha_hosts_map = {}
    standby_rm_key = "StandbyResourceManager"

    try:
        # Open a connection to CM and get a CM object
        api = util.get_api_handle(conf)
        cm = api.get_cloudera_manager()
        cl = None
        if 'cluster' in conf and 'name' in conf['cluster']:
            cl = api.get_cluster(conf['cluster']['name'])
        else:
            raise ProvisionatorException("No cluster specified")


        host_id_map = util.host_id_map(config, api)

        #todo: check if enabled already
        yarn_svc = config.get_cluster_service_by_type(conf, 'YARN')
        yarn = cl.get_service(yarn_svc['name'])
        standby_rm = yarn_svc['haconfig']['standby_rm']
        ha_hosts_map[standby_rm_key] = standby_rm

        LOG.info("host_id_map: " + str(host_id_map))
        LOG.info("ha_host_map: " + str(ha_hosts_map))
            for ha_host in ha_hosts_map:
            LOG.info ("ha_hosts_map[ha_host]: " + ha_hosts_map[ha_host])
            try:
                LOG.info("Host ID for " + ha_host + ": "+ host_id_map[ha_hosts_map[ha_host]])
            except KeyError:
                LOG.error("Cannot find host " + ha_hosts_map[ha_host] + " for role \""
                    + ha_host + "\" in list of hosts (" + str(list(host_id_map)) + ")")
                return

        zk_svc = config.get_cluster_service_by_type(conf, 'ZOOKEEPER')
        zk_svc_name = zk_svc['name']

        LOG.info("Running YARN HA configuration now...")
        # new_rm_host_id - ID of host where Standby Resource Manager will be created.
        # zk_service_name - Name of the ZooKeeper service to use for auto-failover.
        cmd = yarn.enable_rm_ha(
            host_id_map[standby_rm],
            zk_service_name=zk_svc_name
        )

        util.wait_for_command(cmd, True)
        #TODO: in some cases time.sleep(x) is needed to avoid race conditions
        try:
            mgmt_svc = cm.get_service()
            LOG.info("Restarting management services")
            mgmt.restart(cm)
        except ApiException:
	        pass

        LOG.info("Restarting cluster")
        cluster.restart(cl)
    except ApiException, e:
        raise ProvisionatorException(e)
Exemplo n.º 2
0
def enable_kerberos(conf):
    try:
        # Open a connection to CM and get a CM object
        api = util.get_api_handle(conf)
        cm = api.get_cloudera_manager()
        cl = None
        if 'cluster' in conf and 'name' in conf['cluster']:
            cl = api.get_cluster(conf['cluster']['name'])
        else:
            raise ProvisionatorException("No cluster specified")

        # Check the HDFS service to see if Kerberos is already enabled
        hdfs_svc = config.get_cluster_service_by_type(conf, 'HDFS')
        hdfs = cl.get_service(hdfs_svc['name'])
        hdfs_cfg, hdfs_roletype_cfg = hdfs.get_config(view='full')

        if not hdfs_cfg['hadoop_security_authentication'].value == 'kerberos':
            # Kerberos has not been enabled - add the KDC creds
            if 'kdc_user' in conf['cm'] and 'kdc_pass' in conf['cm']:
                check_creds(conf['cm']['kdc_user'], conf['cm']['kdc_pass'])
                LOG.info("Importing credentials for administrative account")
                cmd = cm.import_admin_credentials(conf['cm']['kdc_user'],
                                                  conf['cm']['kdc_pass'])
                util.wait_for_command(cmd, True)

            # OK let's do this
            datanode_transceiver_port = DATANODE_TRANSCEIVER_PORT
            datanode_web_port = DATANODE_WEB_PORT

            cm_cfg = cm.get_config()
            if 'SINGLE_USER_ENABLED' in cm_cfg:
                # TODO: don't hardcode this
                datanode_transceiver_port = 4004
                datanode_web_port = 4006

            LOG.info("Running Kerberos configuration now...")
            cmd = cl.configure_for_kerberos(
                datanode_transceiver_port=datanode_transceiver_port,
                datanode_web_port=datanode_web_port)

            util.wait_for_command(cmd, True)
            #TODO: in some cases time.sleep(x) is needed to avoid race conditions
            try:
                mgmt_svc = cm.get_service()
                LOG.info("Restarting management services")
                mgmt.restart(cm)
            except ApiException:
                pass
            LOG.info("Restarting cluster")
            cluster.restart(cl)
        else:
            LOG.info("Kerberos already enabled")
    except ApiException, e:
        raise ProvisionatorException(e)
Exemplo n.º 3
0
def enable_kerberos(conf):
    try:
        # Open a connection to CM and get a CM object
        api = util.get_api_handle(conf)
        cm = api.get_cloudera_manager()
        cl = None
        if 'cluster' in conf and 'name' in conf['cluster']:
            cl = api.get_cluster(conf['cluster']['name'])
        else:
            raise ProvisionatorException("No cluster specified")

        # Check the HDFS service to see if Kerberos is already enabled
        hdfs_svc = config.get_cluster_service_by_type(conf, 'HDFS')
        hdfs = cl.get_service(hdfs_svc['name'])
        hdfs_cfg, hdfs_roletype_cfg = hdfs.get_config(view='full')

        if not hdfs_cfg['hadoop_security_authentication'].value == 'kerberos':
            # Kerberos has not been enabled - add the KDC creds
            if 'kdc_user' in conf['cm'] and 'kdc_pass' in conf['cm']:
                check_creds(conf['cm']['kdc_user'], conf['cm']['kdc_pass'])
                LOG.info("Importing credentials for administrative account")
                cmd = cm.import_admin_credentials(conf['cm']['kdc_user'], conf['cm']['kdc_pass'])
                util.wait_for_command(cmd, True)

            # OK let's do this
            datanode_transceiver_port = DATANODE_TRANSCEIVER_PORT
            datanode_web_port = DATANODE_WEB_PORT

            cm_cfg = cm.get_config()
            if 'SINGLE_USER_ENABLED' in cm_cfg:
                # TODO: don't hardcode this
                datanode_transceiver_port=4004
                datanode_web_port=4006

            LOG.info("Running Kerberos configuration now...")
            cmd = cl.configure_for_kerberos(datanode_transceiver_port=datanode_transceiver_port,
                                            datanode_web_port=datanode_web_port)

            util.wait_for_command(cmd, True)
            #TODO: in some cases time.sleep(x) is needed to avoid race conditions
            try:
                mgmt_svc = cm.get_service()
                LOG.info("Restarting management services")
                mgmt.restart(cm)
            except ApiException:
                pass
            LOG.info("Restarting cluster")
            cluster.restart(cl)
        else:
            LOG.info("Kerberos already enabled")
    except ApiException, e:
        raise ProvisionatorException(e)
Exemplo n.º 4
0
def enable_hdfsha(conf):
    ha_hosts_map = {}
    active_namenode_key = "ActiveNamenode"
    standby_namenode_key = "StandbyNamenode"
    journalnode1_key = "JournalNode1"
    journalnode2_key = "JournalNode2"
    journalnode3_key = "JournalNode3"

    try:
        # Open a connection to CM and get a CM object
        api = util.get_api_handle(conf)
        cm = api.get_cloudera_manager()
        cl = None
        if 'cluster' in conf and 'name' in conf['cluster']:
            cl = api.get_cluster(conf['cluster']['name'])
        else:
            raise ProvisionatorException("No cluster specified")

        #todo: check if enabled already
        hdfs_svc = config.get_cluster_service_by_type(conf, 'HDFS')
        hdfs = cl.get_service(hdfs_svc['name'])
        hdfs_cfg, hdfs_roletype_cfg = hdfs.get_config(view='full')
        host_id_map = util.host_id_map(config, api)
        active_namenode = hdfs_svc['haconfig']['active_namenode']
        standby_namenode = hdfs_svc['haconfig']['standby_namenode']
        journalnode1 = hdfs_svc['haconfig']['journalnode1']
        journalnode2 = hdfs_svc['haconfig']['journalnode2']
        journalnode3 = hdfs_svc['haconfig']['journalnode3']
        ns = hdfs_svc['haconfig']['ns']
        jnEditsDir = hdfs_svc['haconfig']['jnEditsDir']

        ha_hosts_map[active_namenode_key] = active_namenode
        ha_hosts_map[standby_namenode_key] = standby_namenode
        ha_hosts_map[journalnode1_key] = journalnode1
        ha_hosts_map[journalnode2_key] = journalnode2
        ha_hosts_map[journalnode3_key] = journalnode3

        LOG.info("host_id_map: " + str(host_id_map))
        LOG.info("ha_host_map: " + str(ha_hosts_map))
    	for ha_host in ha_hosts_map:
		LOG.info ("ha_hosts_map[ha_host]: " + ha_hosts_map[ha_host])
		try:
			LOG.info("Host ID for " + ha_host + ": "+ host_id_map[ha_hosts_map[ha_host]])
		except KeyError:
			LOG.error("Cannot find host " + ha_hosts_map[ha_host] + " for role \""
				+ ha_host + "\" in list of hosts (" + str(list(host_id_map)) + ")")
			return


        jns = []
        jns.append({'jnHostId': host_id_map[journalnode1], 'jnName': journalnode1_key, 'jnEditsDir': jnEditsDir})
        jns.append({'jnHostId': host_id_map[journalnode2], 'jnName': journalnode2_key, 'jnEditsDir': jnEditsDir})
        jns.append({'jnHostId': host_id_map[journalnode3], 'jnName': journalnode3_key, 'jnEditsDir': jnEditsDir})

        zk_svc = config.get_cluster_service_by_type(conf, 'ZOOKEEPER')
        zk_svc_name = zk_svc['name']

        nn = hdfs.get_roles_by_type("NAMENODE")

        if not nn:
            raise ProvisionatorException("Could not find the active Namenode for current configuration")



        if len([instance.name for instance in nn]) > 1:
            LOG.warn("Found more than one Namnode in current configuration")
        nn_name = ([instance.name for instance in nn])[0]
        LOG.info("Active Namenode name in current config: " + nn_name)

        LOG.info("Running HDFS HA configuration now...")
        # standby_host_id - ID of host where Standby NameNode will be created.
        # nameservice - Nameservice to be used while enabling HA
        # jns - List of Journal Nodes. Each element must be a dict containing the following keys:
        # jnHostId: ID of the host where the new JournalNode will be created.
        # jnName: Name of the JournalNode role (optional)
        # jnEditsDir: Edits dir of the JournalNode. Can be omitted if the config is already set at RCG level.
        # zk_service_name - Name of the ZooKeeper service to use for auto-failover.
        cmd = hdfs.enable_nn_ha(
            nn_name,
            host_id_map[standby_namenode],
            ns,
            jns,
            standby_name_dir_list=None,
            qj_name=None,
            standby_name=None,
            active_fc_name=None,
            standby_fc_name=None,
            zk_service_name=zk_svc_name,
            force_init_znode=True,
            clear_existing_standby_name_dirs=True,
            clear_existing_jn_edits_dir=True
        )

        util.wait_for_command(cmd, True)
        #TODO: in some cases time.sleep(x) is needed to avoid race conditions
        try:
	    mgmt_svc = cm.get_service()
	    LOG.info("Restarting management services")
	    mgmt.restart(cm)
        except ApiException:
	    pass
        LOG.info("Restarting cluster")
        cluster.restart(cl)
    except ApiException, e:
        raise ProvisionatorException(e)
Exemplo n.º 5
0
def restart(cluster, exception_on_fail=True):
    cmd = cluster.restart(restart_only_stale_services=True,
                          redeploy_client_configuration=True)
    util.wait_for_command(cmd, exception_on_fail)
Exemplo n.º 6
0
def restart(cm, exception_on_fail=True):
    LOG.debug("Restarting management services")
    cmd = cm.get_service().restart()
    util.wait_for_command(cmd, exception_on_fail)
Exemplo n.º 7
0
def restart(cluster, exception_on_fail=True):
    cmd = cluster.restart(restart_only_stale_services=True, redeploy_client_configuration=True)
    util.wait_for_command(cmd, exception_on_fail)
Exemplo n.º 8
0
def restart(cm, exception_on_fail=True):
    LOG.debug("Restarting management services")
    cmd = cm.get_service().restart()
    util.wait_for_command(cmd, exception_on_fail)
Exemplo n.º 9
0
def restart(svc_config, cluster, exception_on_fail=False):
    service = cluster.get_service(svc_config['name'])
    cmd = service.restart()
    return util.wait_for_command(cmd, exception_on_fail)
Exemplo n.º 10
0
                _conf = role['config']
                if to_interpolate:
                    _conf = _interpolate_roles(_conf, cluster, host_id_map)
                role_config.update_config(_conf)

    # Get updated service object
    service = cluster.get_service(svc_config['name'])
    LOG.debug("Service %s stale: %s, %s" % (service.name,
                                            service.configStale,
                                            service.clientConfigStalenessStatus))

    if new:
        # First run - this might fail for valid reasons for some services so add an additional start
        LOG.info("Performing first run actions for service %s" % svc_config['name'])
        cmd = service._cmd('firstRun', None, api_version=7)
        util.wait_for_command(cmd, True)
        if start_service:
            start(svc_config, cluster, True)
    elif start_service and service.configStale == 'true':
        LOG.info("Configuration stale for %s, restarting" % svc_config['name'])
        restart(svc_config, cluster, True)
    if service.clientConfigStalenessStatus == 'STALE':
        LOG.info("Client config stale for %s, redeploying" % svc_config['name'])
        cmd = service.deploy_client_config()
        util.wait_for_command(cmd, True)


def start(svc_config, cluster, exception_on_fail=False):
    service = cluster.get_service(svc_config['name'])
    cmd = service.start()
    return util.wait_for_command(cmd, exception_on_fail)