Пример #1
0
def _start_cloudera_manager(cluster):
    manager = pu.get_manager(cluster)
    with manager.remote() as r:
        cmd.start_cloudera_db(r)
        cmd.start_manager(r)

    timeout = 300
    LOG.debug("Waiting %(timeout)s seconds for Manager to start : " %
              {'timeout': timeout})
    s_time = timeutils.utcnow()
    while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout:
        try:
            conn = telnetlib.Telnet(manager.management_ip, CM_API_PORT)
            conn.close()
            break
        except IOError:
            context.sleep(2)
    else:
        message = _("Cloudera Manager failed to start in %(timeout)s minutes "
                    "on node '%(node)s' of cluster '%(cluster)s'") % {
                        'timeout': timeout / 60,
                        'node': manager.management_ip,
                        'cluster': cluster.name
                    }
        raise ex.HadoopProvisionError(message)

    LOG.info(_LI("Cloudera Manager has been started"))
Пример #2
0
def _start_cloudera_manager(cluster):
    manager = pu.get_manager(cluster)
    with manager.remote() as r:
        cmd.start_cloudera_db(r)
        cmd.start_manager(r)

    timeout = 300
    LOG.debug("Waiting %(timeout)s seconds for Manager to start : " % {
        'timeout': timeout})
    s_time = timeutils.utcnow()
    while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout:
        try:
            conn = telnetlib.Telnet(manager.management_ip, CM_API_PORT)
            conn.close()
            break
        except IOError:
            context.sleep(2)
    else:
        message = _("Cloudera Manager failed to start in %(timeout)s minutes "
                    "on node '%(node)s' of cluster '%(cluster)s'") % {
                        'timeout': timeout / 60,
                        'node': manager.management_ip,
                        'cluster': cluster.name}
        raise ex.HadoopProvisionError(message)

    LOG.info(_LI("Cloudera Manager has been started"))
Пример #3
0
def _configure_hive(cluster):
    manager = pu.get_manager(cluster)
    with manager.remote() as r:
        db_helper.create_hive_database(cluster, r)

    # Hive requires /tmp/hive-hive directory
    namenode = pu.get_namenode(cluster)
    with namenode.remote() as r:
        r.execute_command(
            'sudo su - -c "hadoop fs -mkdir -p /tmp/hive-hive" hdfs')
        r.execute_command(
            'sudo su - -c "hadoop fs -chown hive /tmp/hive-hive" hdfs')
Пример #4
0
    def _set_cluster_info(self, cluster):
        mng = cu.get_manager(cluster)
        info = {
            'Cloudera Manager': {
                'Web UI': 'http://%s:7180' % mng.management_ip,
                'Username': '******',
                'Password': '******'
            }
        }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Пример #5
0
    def _set_cluster_info(self, cluster):
        mng = cu.get_manager(cluster)
        info = {
            'Cloudera Manager': {
                'Web UI': 'http://%s:7180' % mng.management_ip,
                'Username': '******',
                'Password': '******'
            }
        }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Пример #6
0
def create_mgmt_service(cluster):
    api = get_api_client(cluster)
    cm = api.get_cloudera_manager()

    setup_info = services.ApiServiceSetupInfo()
    manager = pu.get_manager(cluster)
    hostname = manager.fqdn()
    processes = ['SERVICEMONITOR', 'HOSTMONITOR',
                 'EVENTSERVER', 'ALERTPUBLISHER']
    for proc in processes:
        setup_info.add_role_info(get_role_name(manager, proc), proc, hostname)

    cm.create_mgmt_service(setup_info)
    cm.hosts_start_roles([hostname])
Пример #7
0
def configure_cluster(cluster):
    instances = gu.get_instances(cluster)

    if not cmd.is_pre_installed_cdh(pu.get_manager(cluster).remote()):
        _configure_os(instances)
        _install_packages(instances, PACKAGES)

    _start_cloudera_agents(instances)
    _start_cloudera_manager(cluster)
    _await_agents(instances)
    _configure_manager(cluster)
    _create_services(cluster)
    _configure_services(cluster)
    _configure_instances(instances)
    cu.deploy_configs(cluster)
Пример #8
0
def create_mgmt_service(cluster):
    api = get_api_client(cluster)
    cm = api.get_cloudera_manager()

    setup_info = services.ApiServiceSetupInfo()
    manager = pu.get_manager(cluster)
    hostname = manager.fqdn()
    processes = [
        'SERVICEMONITOR', 'HOSTMONITOR', 'EVENTSERVER', 'ALERTPUBLISHER'
    ]
    for proc in processes:
        setup_info.add_role_info(get_role_name(manager, proc), proc, hostname)

    cm.create_mgmt_service(setup_info)
    cm.hosts_start_roles([hostname])
Пример #9
0
def configure_cluster(cluster):
    instances = gu.get_instances(cluster)

    if not cmd.is_pre_installed_cdh(pu.get_manager(cluster).remote()):
        _configure_os(instances)
        _install_packages(instances, PACKAGES)

    _start_cloudera_agents(instances)
    _start_cloudera_manager(cluster)
    _await_agents(instances)
    _configure_manager(cluster)
    _create_services(cluster)
    _configure_services(cluster)
    _configure_instances(instances)
    cu.deploy_configs(cluster)
Пример #10
0
def scale_cluster(cluster, instances):
    if not cmd.is_pre_installed_cdh(pu.get_manager(cluster).remote()):
        _configure_os(instances)
        _install_packages(instances, PACKAGES)
        _post_install(instances)

    _start_cloudera_agents(instances)
    for instance in instances:
        _configure_instance(instance)
        cu.update_configs(instance)

        if 'DATANODE' in instance.node_group.node_processes:
            cu.refresh_nodes(cluster, 'DATANODE', cu.HDFS_SERVICE_NAME)

        _configure_swift_to_inst(instance)

        if 'DATANODE' in instance.node_group.node_processes:
            hdfs = cu.get_service('DATANODE', instance=instance)
            cu.start_roles(hdfs, cu.get_role_name(instance, 'DATANODE'))

        if 'NODEMANAGER' in instance.node_group.node_processes:
            yarn = cu.get_service('NODEMANAGER', instance=instance)
            cu.start_roles(yarn, cu.get_role_name(instance, 'NODEMANAGER'))
Пример #11
0
def _get_configs(service, cluster=None, node_group=None):
    def get_hadoop_dirs(mount_points, suffix):
        return ','.join([x + suffix for x in mount_points])

    all_confs = {}
    if cluster:
        zk_count = v._get_inst_count(cluster, 'ZOOKEEPER_SERVER')
        hbm_count = v._get_inst_count(cluster, 'HBASE_MASTER')
        snt_count = v._get_inst_count(cluster, 'SENTRY_SERVER')
        ks_count = v._get_inst_count(cluster, 'KEY_VALUE_STORE_INDEXER')
        imp_count = v._get_inst_count(cluster, 'IMPALA_CATALOGSERVER')
        core_site_safety_valve = ''
        if c_helper.is_swift_enabled(cluster):
            configs = swift_helper.get_swift_configs()
            confs = dict((c['name'], c['value']) for c in configs)
            core_site_safety_valve = xmlutils.create_elements_xml(confs)
        all_confs = {
            'HDFS': {
                'zookeeper_service':
                    cu.ZOOKEEPER_SERVICE_NAME if zk_count else '',
                'dfs_block_local_path_access_user':
                    '******' if imp_count else '',
                'core_site_safety_valve': core_site_safety_valve
            },
            'HIVE': {
                'mapreduce_yarn_service': cu.YARN_SERVICE_NAME,
                'zookeeper_service':
                    cu.ZOOKEEPER_SERVICE_NAME if zk_count else ''
            },
            'OOZIE': {
                'mapreduce_yarn_service': cu.YARN_SERVICE_NAME,
                'zookeeper_service':
                    cu.ZOOKEEPER_SERVICE_NAME if zk_count else ''
            },
            'YARN': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'zookeeper_service':
                    cu.ZOOKEEPER_SERVICE_NAME if zk_count else ''
            },
            'HUE': {
                'hive_service': cu.HIVE_SERVICE_NAME,
                'oozie_service': cu.OOZIE_SERVICE_NAME,
                'sentry_service': cu.SENTRY_SERVICE_NAME if snt_count else '',
                'zookeeper_service':
                    cu.ZOOKEEPER_SERVICE_NAME if zk_count else ''
            },
            'SPARK_ON_YARN': {
                'yarn_service': cu.YARN_SERVICE_NAME
            },
            'HBASE': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'zookeeper_service': cu.ZOOKEEPER_SERVICE_NAME,
                'hbase_enable_indexing': 'true' if ks_count else 'false',
                'hbase_enable_replication': 'true' if ks_count else 'false'
            },
            'FLUME': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'hbase_service': cu.HBASE_SERVICE_NAME if hbm_count else ''
            },
            'SENTRY': {
                'hdfs_service': cu.HDFS_SERVICE_NAME
            },
            'SOLR': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'zookeeper_service': cu.ZOOKEEPER_SERVICE_NAME
            },
            'SQOOP': {
                'mapreduce_yarn_service': cu.YARN_SERVICE_NAME
            },
            'KS_INDEXER': {
                'hbase_service': cu.HBASE_SERVICE_NAME,
                'solr_service': cu.SOLR_SERVICE_NAME
            },
            'IMPALA': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'hbase_service': cu.HBASE_SERVICE_NAME if hbm_count else '',
                'hive_service': cu.HIVE_SERVICE_NAME
            }
        }
        hive_confs = {
            'HIVE': {
                'hive_metastore_database_type': 'postgresql',
                'hive_metastore_database_host':
                pu.get_manager(cluster).internal_ip,
                'hive_metastore_database_port': '7432',
                'hive_metastore_database_password':
                db_helper.get_hive_db_password(cluster)
            }
        }
        hue_confs = {
            'HUE': {
                'hue_webhdfs': cu.get_role_name(pu.get_namenode(cluster),
                                                'NAMENODE')
            }
        }
        sentry_confs = {
            'SENTRY': {
                'sentry_server_database_type': 'postgresql',
                'sentry_server_database_host':
                pu.get_manager(cluster).internal_ip,
                'sentry_server_database_port': '7432',
                'sentry_server_database_password':
                db_helper.get_sentry_db_password(cluster)
            }
        }

        all_confs = _merge_dicts(all_confs, hue_confs)
        all_confs = _merge_dicts(all_confs, hive_confs)
        all_confs = _merge_dicts(all_confs, sentry_confs)
        all_confs = _merge_dicts(all_confs, cluster.cluster_configs)

    if node_group:
        paths = node_group.storage_paths()

        ng_default_confs = {
            'NAMENODE': {
                'dfs_name_dir_list': get_hadoop_dirs(paths, '/fs/nn')
            },
            'SECONDARYNAMENODE': {
                'fs_checkpoint_dir_list': get_hadoop_dirs(paths, '/fs/snn')
            },
            'DATANODE': {
                'dfs_data_dir_list': get_hadoop_dirs(paths, '/fs/dn'),
                'dfs_datanode_data_dir_perm': 755,
                'dfs_datanode_handler_count': 30
            },
            'NODEMANAGER': {
                'yarn_nodemanager_local_dirs': get_hadoop_dirs(paths,
                                                               '/yarn/local')
            },
            'SERVER': {
                'maxSessionTimeout': 60000
            }
        }

        ng_user_confs = pu.convert_process_configs(node_group.node_configs)
        all_confs = _merge_dicts(all_confs, ng_user_confs)
        all_confs = _merge_dicts(all_confs, ng_default_confs)

    return all_confs.get(service, {})
Пример #12
0
 def test_get_manager(self):
     cluster = tu.get_fake_cluster()
     inst = u.get_manager(cluster)
     self.assertEqual('id1', inst.instance_id)
Пример #13
0
def _configure_sentry(cluster):
    manager = pu.get_manager(cluster)
    with manager.remote() as r:
        db_helper.create_sentry_database(cluster, r)
Пример #14
0
def get_api_client(cluster):
    manager_ip = pu.get_manager(cluster).management_ip
    return api_client.ApiResource(manager_ip,
                                  username=CM_DEFAULT_USERNAME,
                                  password=CM_DEFAULT_PASSWD,
                                  version=CM_API_VERSION)
Пример #15
0
def _start_cloudera_agent(instance):
    mng_hostname = pu.get_manager(instance.node_group.cluster).hostname()
    with instance.remote() as r:
        cmd.configure_agent(r, mng_hostname)
        cmd.start_agent(r)
Пример #16
0
def _get_configs(service, cluster=None, node_group=None):
    def get_hadoop_dirs(mount_points, suffix):
        return ','.join([x + suffix for x in mount_points])

    all_confs = {
        'OOZIE': {
            'mapreduce_yarn_service': cu.YARN_SERVICE_NAME
        },
        'YARN': {
            'hdfs_service': cu.HDFS_SERVICE_NAME
        },
        'HUE': {
            'hive_service': cu.HIVE_SERVICE_NAME,
            'oozie_service': cu.OOZIE_SERVICE_NAME
        },
        'SPARK_ON_YARN': {
            'yarn_service': cu.YARN_SERVICE_NAME
        }
    }

    if node_group:
        paths = node_group.storage_paths()

        ng_default_confs = {
            'NAMENODE': {
                'dfs_name_dir_list': get_hadoop_dirs(paths, '/fs/nn')
            },
            'SECONDARYNAMENODE': {
                'fs_checkpoint_dir_list': get_hadoop_dirs(paths, '/fs/snn')
            },
            'DATANODE': {
                'dfs_data_dir_list': get_hadoop_dirs(paths, '/fs/dn')
            },
            'NODEMANAGER': {
                'yarn_nodemanager_local_dirs': get_hadoop_dirs(paths,
                                                               '/yarn/local')
            }
        }

        ng_user_confs = node_group.node_configs
        all_confs = _merge_dicts(all_confs, ng_user_confs)
        all_confs = _merge_dicts(all_confs, ng_default_confs)

    if cluster:
        hive_confs = {
            'HIVE': {
                'hive_metastore_database_type': 'postgresql',
                'hive_metastore_database_host':
                pu.get_manager(cluster).internal_ip,
                'hive_metastore_database_port': '7432',
                'hive_metastore_database_password':
                db_helper.get_hive_db_password(cluster),
                'mapreduce_yarn_service': cu.YARN_SERVICE_NAME
            }
        }
        hue_confs = {
            'HUE': {
                'hue_webhdfs': cu.get_role_name(pu.get_namenode(cluster),
                                                'NAMENODE')
            }
        }

        all_confs = _merge_dicts(all_confs, hue_confs)
        all_confs = _merge_dicts(all_confs, hive_confs)
        all_confs = _merge_dicts(all_confs, cluster.cluster_configs)

    return all_confs.get(service, {})
Пример #17
0
def _configure_sentry(cluster):
    manager = pu.get_manager(cluster)
    with manager.remote() as r:
        db_helper.create_sentry_database(cluster, r)
Пример #18
0
def _start_cloudera_agent(instance):
    mng_hostname = pu.get_manager(instance.node_group.cluster).hostname()
    with instance.remote() as r:
        cmd.configure_agent(r, mng_hostname)
        cmd.start_agent(r)
Пример #19
0
def get_api_client(cluster):
    manager_ip = pu.get_manager(cluster).management_ip
    return api_client.ApiResource(manager_ip, username=CM_DEFAULT_USERNAME,
                                  password=CM_DEFAULT_PASSWD)
Пример #20
0
def _get_configs(service, cluster=None, node_group=None):
    def get_hadoop_dirs(mount_points, suffix):
        return ','.join([x + suffix for x in mount_points])

    all_confs = {}
    if cluster:
        zk_count = v._get_inst_count(cluster, 'ZOOKEEPER_SERVER')
        hbm_count = v._get_inst_count(cluster, 'HBASE_MASTER')
        snt_count = v._get_inst_count(cluster, 'SENTRY_SERVER')
        ks_count = v._get_inst_count(cluster, 'KEY_VALUE_STORE_INDEXER')
        imp_count = v._get_inst_count(cluster, 'IMPALA_CATALOGSERVER')
        core_site_safety_valve = ''
        if c_helper.is_swift_enabled(cluster):
            configs = swift_helper.get_swift_configs()
            confs = dict((c['name'], c['value']) for c in configs)
            core_site_safety_valve = xmlutils.create_elements_xml(confs)
        all_confs = {
            'HDFS': {
                'zookeeper_service':
                cu.ZOOKEEPER_SERVICE_NAME if zk_count else '',
                'dfs_block_local_path_access_user':
                '******' if imp_count else '',
                'core_site_safety_valve': core_site_safety_valve
            },
            'HIVE': {
                'mapreduce_yarn_service': cu.YARN_SERVICE_NAME,
                'zookeeper_service':
                cu.ZOOKEEPER_SERVICE_NAME if zk_count else ''
            },
            'OOZIE': {
                'mapreduce_yarn_service': cu.YARN_SERVICE_NAME,
                'zookeeper_service':
                cu.ZOOKEEPER_SERVICE_NAME if zk_count else ''
            },
            'YARN': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'zookeeper_service':
                cu.ZOOKEEPER_SERVICE_NAME if zk_count else ''
            },
            'HUE': {
                'hive_service': cu.HIVE_SERVICE_NAME,
                'oozie_service': cu.OOZIE_SERVICE_NAME,
                'sentry_service': cu.SENTRY_SERVICE_NAME if snt_count else '',
                'zookeeper_service':
                cu.ZOOKEEPER_SERVICE_NAME if zk_count else ''
            },
            'SPARK_ON_YARN': {
                'yarn_service': cu.YARN_SERVICE_NAME
            },
            'HBASE': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'zookeeper_service': cu.ZOOKEEPER_SERVICE_NAME,
                'hbase_enable_indexing': 'true' if ks_count else 'false',
                'hbase_enable_replication': 'true' if ks_count else 'false'
            },
            'FLUME': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'hbase_service': cu.HBASE_SERVICE_NAME if hbm_count else ''
            },
            'SENTRY': {
                'hdfs_service': cu.HDFS_SERVICE_NAME
            },
            'SOLR': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'zookeeper_service': cu.ZOOKEEPER_SERVICE_NAME
            },
            'SQOOP': {
                'mapreduce_yarn_service': cu.YARN_SERVICE_NAME
            },
            'KS_INDEXER': {
                'hbase_service': cu.HBASE_SERVICE_NAME,
                'solr_service': cu.SOLR_SERVICE_NAME
            },
            'IMPALA': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'hbase_service': cu.HBASE_SERVICE_NAME if hbm_count else '',
                'hive_service': cu.HIVE_SERVICE_NAME
            }
        }
        hive_confs = {
            'HIVE': {
                'hive_metastore_database_type':
                'postgresql',
                'hive_metastore_database_host':
                pu.get_manager(cluster).internal_ip,
                'hive_metastore_database_port':
                '7432',
                'hive_metastore_database_password':
                db_helper.get_hive_db_password(cluster)
            }
        }
        hue_confs = {
            'HUE': {
                'hue_webhdfs':
                cu.get_role_name(pu.get_namenode(cluster), 'NAMENODE')
            }
        }
        sentry_confs = {
            'SENTRY': {
                'sentry_server_database_type':
                'postgresql',
                'sentry_server_database_host':
                pu.get_manager(cluster).internal_ip,
                'sentry_server_database_port':
                '7432',
                'sentry_server_database_password':
                db_helper.get_sentry_db_password(cluster)
            }
        }

        all_confs = _merge_dicts(all_confs, hue_confs)
        all_confs = _merge_dicts(all_confs, hive_confs)
        all_confs = _merge_dicts(all_confs, sentry_confs)
        all_confs = _merge_dicts(all_confs, cluster.cluster_configs)

    if node_group:
        paths = node_group.storage_paths()

        ng_default_confs = {
            'NAMENODE': {
                'dfs_name_dir_list': get_hadoop_dirs(paths, '/fs/nn')
            },
            'SECONDARYNAMENODE': {
                'fs_checkpoint_dir_list': get_hadoop_dirs(paths, '/fs/snn')
            },
            'DATANODE': {
                'dfs_data_dir_list': get_hadoop_dirs(paths, '/fs/dn'),
                'dfs_datanode_data_dir_perm': 755,
                'dfs_datanode_handler_count': 30
            },
            'NODEMANAGER': {
                'yarn_nodemanager_local_dirs':
                get_hadoop_dirs(paths, '/yarn/local')
            },
            'SERVER': {
                'maxSessionTimeout': 60000
            }
        }

        ng_user_confs = pu.convert_process_configs(node_group.node_configs)
        all_confs = _merge_dicts(all_confs, ng_user_confs)
        all_confs = _merge_dicts(all_confs, ng_default_confs)

    return all_confs.get(service, {})
Пример #21
0
def _get_configs(service, cluster=None, node_group=None):
    def get_hadoop_dirs(mount_points, suffix):
        return ','.join([x + suffix for x in mount_points])

    all_confs = {
        'OOZIE': {
            'mapreduce_yarn_service': cu.YARN_SERVICE_NAME
        },
        'YARN': {
            'hdfs_service': cu.HDFS_SERVICE_NAME
        },
        'HUE': {
            'hive_service': cu.HIVE_SERVICE_NAME,
            'oozie_service': cu.OOZIE_SERVICE_NAME
        },
        'SPARK_ON_YARN': {
            'yarn_service': cu.YARN_SERVICE_NAME
        },
        'HBASE': {
            'hdfs_service': cu.HDFS_SERVICE_NAME,
            'zookeeper_service': cu.ZOOKEEPER_SERVICE_NAME
        }
    }

    if node_group:
        paths = node_group.storage_paths()

        ng_default_confs = {
            'NAMENODE': {
                'dfs_name_dir_list': get_hadoop_dirs(paths, '/fs/nn')
            },
            'SECONDARYNAMENODE': {
                'fs_checkpoint_dir_list': get_hadoop_dirs(paths, '/fs/snn')
            },
            'DATANODE': {
                'dfs_data_dir_list': get_hadoop_dirs(paths, '/fs/dn')
            },
            'NODEMANAGER': {
                'yarn_nodemanager_local_dirs':
                get_hadoop_dirs(paths, '/yarn/local')
            }
        }

        ng_user_confs = pu.convert_process_configs(node_group.node_configs)
        all_confs = _merge_dicts(all_confs, ng_user_confs)
        all_confs = _merge_dicts(all_confs, ng_default_confs)

    if cluster:
        hive_confs = {
            'HIVE': {
                'hive_metastore_database_type':
                'postgresql',
                'hive_metastore_database_host':
                pu.get_manager(cluster).internal_ip,
                'hive_metastore_database_port':
                '7432',
                'hive_metastore_database_password':
                db_helper.get_hive_db_password(cluster),
                'mapreduce_yarn_service':
                cu.YARN_SERVICE_NAME
            }
        }
        hue_confs = {
            'HUE': {
                'hue_webhdfs':
                cu.get_role_name(pu.get_namenode(cluster), 'NAMENODE')
            }
        }

        all_confs = _merge_dicts(all_confs, hue_confs)
        all_confs = _merge_dicts(all_confs, hive_confs)
        all_confs = _merge_dicts(all_confs, cluster.cluster_configs)

    return all_confs.get(service, {})