Exemplo n.º 1
0
def scale_cluster(cluster, instances):
    if not instances:
        return

    if not cmd.is_pre_installed_cdh(instances[0].remote()):
        _configure_os(instances)
        _install_packages(instances, PACKAGES)

    _start_cloudera_agents(instances)
    _await_agents(instances)
    for instance in instances:
        _configure_instance(instance)
        cu.update_configs(instance)

        if 'DATANODE' in instance.node_group.node_processes:
            cu.refresh_nodes(cluster, 'DATANODE', cu.HDFS_SERVICE_NAME)

        _configure_swift_to_inst(instance)

        if 'DATANODE' in instance.node_group.node_processes:
            hdfs = cu.get_service('DATANODE', instance=instance)
            cu.start_roles(hdfs, cu.get_role_name(instance, 'DATANODE'))

        if 'NODEMANAGER' in instance.node_group.node_processes:
            yarn = cu.get_service('NODEMANAGER', instance=instance)
            cu.start_roles(yarn, cu.get_role_name(instance, 'NODEMANAGER'))
Exemplo n.º 2
0
def scale_cluster(cluster, instances):
    if not instances:
        return

    if not cmd.is_pre_installed_cdh(instances[0].remote()):
        _configure_os(instances)
        _install_packages(instances, PACKAGES)

    _start_cloudera_agents(instances)
    _await_agents(instances)
    for instance in instances:
        _configure_instance(instance)
        cu.update_configs(instance)

        if 'DATANODE' in instance.node_group.node_processes:
            cu.refresh_nodes(cluster, 'DATANODE', cu.HDFS_SERVICE_NAME)

        _configure_swift_to_inst(instance)

        if 'DATANODE' in instance.node_group.node_processes:
            hdfs = cu.get_service('DATANODE', instance=instance)
            cu.start_roles(hdfs, cu.get_role_name(instance, 'DATANODE'))

        if 'NODEMANAGER' in instance.node_group.node_processes:
            yarn = cu.get_service('NODEMANAGER', instance=instance)
            cu.start_roles(yarn, cu.get_role_name(instance, 'NODEMANAGER'))
Exemplo n.º 3
0
def _add_role(instance, process):
    if process in ["MANAGER"]:
        return

    service = cu.get_service(process, instance=instance)
    role = service.create_role(cu.get_role_name(instance, process), process, instance.fqdn())
    role.update_config(_get_configs(process, node_group=instance.node_group))
Exemplo n.º 4
0
def _add_role(instance, process):
    if process in ['MANAGER']:
        return

    service = cu.get_service(process, instance=instance)
    role = service.create_role(cu.get_role_name(instance, process), process,
                               instance.fqdn())
    role.update_config(_get_configs(process, node_group=instance.node_group))
Exemplo n.º 5
0
def _add_role(instance, process):
    if process in ['CLOUDERA_MANAGER']:
        return

    process = pu.convert_role_showname(process)
    service = cu.get_service(process, instance=instance)
    role = service.create_role(cu.get_role_name(instance, process),
                               process, instance.fqdn())
    role.update_config(_get_configs(process, node_group=instance.node_group))
Exemplo n.º 6
0
def decommission_cluster(cluster, instances):
    dns = []
    nms = []
    for i in instances:
        if 'DATANODE' in i.node_group.node_processes:
            dns.append(cu.get_role_name(i, 'DATANODE'))
        if 'NODEMANAGER' in i.node_group.node_processes:
            nms.append(cu.get_role_name(i, 'NODEMANAGER'))

    if dns:
        cu.decommission_nodes(cluster, 'DATANODE', dns)

    if nms:
        cu.decommission_nodes(cluster, 'NODEMANAGER', nms)

    cu.delete_instances(cluster, instances)

    cu.refresh_nodes(cluster, 'DATANODE', cu.HDFS_SERVICE_NAME)
    cu.refresh_nodes(cluster, 'NODEMANAGER', cu.YARN_SERVICE_NAME)
Exemplo n.º 7
0
def decommission_cluster(cluster, instances):
    dns = []
    nms = []
    for i in instances:
        if 'DATANODE' in i.node_group.node_processes:
            dns.append(cu.get_role_name(i, 'DATANODE'))
        if 'NODEMANAGER' in i.node_group.node_processes:
            nms.append(cu.get_role_name(i, 'NODEMANAGER'))

    if dns:
        cu.decommission_nodes(cluster, 'DATANODE', dns)

    if nms:
        cu.decommission_nodes(cluster, 'NODEMANAGER', nms)

    cu.delete_instances(cluster, instances)

    cu.refresh_nodes(cluster, 'DATANODE', cu.HDFS_SERVICE_NAME)
    cu.refresh_nodes(cluster, 'NODEMANAGER', cu.YARN_SERVICE_NAME)
Exemplo n.º 8
0
def _get_configs(service, cluster=None, node_group=None):
    def get_hadoop_dirs(mount_points, suffix):
        return ','.join([x + suffix for x in mount_points])

    all_confs = {}
    if cluster:
        zk_count = v._get_inst_count(cluster, 'ZOOKEEPER_SERVER')
        hbm_count = v._get_inst_count(cluster, 'HBASE_MASTER')
        snt_count = v._get_inst_count(cluster, 'SENTRY_SERVER')
        ks_count = v._get_inst_count(cluster, 'KEY_VALUE_STORE_INDEXER')
        imp_count = v._get_inst_count(cluster, 'IMPALA_CATALOGSERVER')
        core_site_safety_valve = ''
        if c_helper.is_swift_enabled(cluster):
            configs = swift_helper.get_swift_configs()
            confs = dict((c['name'], c['value']) for c in configs)
            core_site_safety_valve = xmlutils.create_elements_xml(confs)
        all_confs = {
            'HDFS': {
                'zookeeper_service':
                cu.ZOOKEEPER_SERVICE_NAME if zk_count else '',
                'dfs_block_local_path_access_user':
                '******' if imp_count else '',
                'core_site_safety_valve': core_site_safety_valve
            },
            'HIVE': {
                'mapreduce_yarn_service': cu.YARN_SERVICE_NAME,
                'zookeeper_service':
                cu.ZOOKEEPER_SERVICE_NAME if zk_count else ''
            },
            'OOZIE': {
                'mapreduce_yarn_service': cu.YARN_SERVICE_NAME,
                'zookeeper_service':
                cu.ZOOKEEPER_SERVICE_NAME if zk_count else ''
            },
            'YARN': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'zookeeper_service':
                cu.ZOOKEEPER_SERVICE_NAME if zk_count else ''
            },
            'HUE': {
                'hive_service': cu.HIVE_SERVICE_NAME,
                'oozie_service': cu.OOZIE_SERVICE_NAME,
                'sentry_service': cu.SENTRY_SERVICE_NAME if snt_count else '',
                'zookeeper_service':
                cu.ZOOKEEPER_SERVICE_NAME if zk_count else ''
            },
            'SPARK_ON_YARN': {
                'yarn_service': cu.YARN_SERVICE_NAME
            },
            'HBASE': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'zookeeper_service': cu.ZOOKEEPER_SERVICE_NAME,
                'hbase_enable_indexing': 'true' if ks_count else 'false',
                'hbase_enable_replication': 'true' if ks_count else 'false'
            },
            'FLUME': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'hbase_service': cu.HBASE_SERVICE_NAME if hbm_count else ''
            },
            'SENTRY': {
                'hdfs_service': cu.HDFS_SERVICE_NAME
            },
            'SOLR': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'zookeeper_service': cu.ZOOKEEPER_SERVICE_NAME
            },
            'SQOOP': {
                'mapreduce_yarn_service': cu.YARN_SERVICE_NAME
            },
            'KS_INDEXER': {
                'hbase_service': cu.HBASE_SERVICE_NAME,
                'solr_service': cu.SOLR_SERVICE_NAME
            },
            'IMPALA': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'hbase_service': cu.HBASE_SERVICE_NAME if hbm_count else '',
                'hive_service': cu.HIVE_SERVICE_NAME
            }
        }
        hive_confs = {
            'HIVE': {
                'hive_metastore_database_type':
                'postgresql',
                'hive_metastore_database_host':
                pu.get_manager(cluster).internal_ip,
                'hive_metastore_database_port':
                '7432',
                'hive_metastore_database_password':
                db_helper.get_hive_db_password(cluster)
            }
        }
        hue_confs = {
            'HUE': {
                'hue_webhdfs':
                cu.get_role_name(pu.get_namenode(cluster), 'NAMENODE')
            }
        }
        sentry_confs = {
            'SENTRY': {
                'sentry_server_database_type':
                'postgresql',
                'sentry_server_database_host':
                pu.get_manager(cluster).internal_ip,
                'sentry_server_database_port':
                '7432',
                'sentry_server_database_password':
                db_helper.get_sentry_db_password(cluster)
            }
        }

        all_confs = _merge_dicts(all_confs, hue_confs)
        all_confs = _merge_dicts(all_confs, hive_confs)
        all_confs = _merge_dicts(all_confs, sentry_confs)
        all_confs = _merge_dicts(all_confs, cluster.cluster_configs)

    if node_group:
        paths = node_group.storage_paths()

        ng_default_confs = {
            'NAMENODE': {
                'dfs_name_dir_list': get_hadoop_dirs(paths, '/fs/nn')
            },
            'SECONDARYNAMENODE': {
                'fs_checkpoint_dir_list': get_hadoop_dirs(paths, '/fs/snn')
            },
            'DATANODE': {
                'dfs_data_dir_list': get_hadoop_dirs(paths, '/fs/dn'),
                'dfs_datanode_data_dir_perm': 755,
                'dfs_datanode_handler_count': 30
            },
            'NODEMANAGER': {
                'yarn_nodemanager_local_dirs':
                get_hadoop_dirs(paths, '/yarn/local')
            },
            'SERVER': {
                'maxSessionTimeout': 60000
            }
        }

        ng_user_confs = pu.convert_process_configs(node_group.node_configs)
        all_confs = _merge_dicts(all_confs, ng_user_confs)
        all_confs = _merge_dicts(all_confs, ng_default_confs)

    return all_confs.get(service, {})
Exemplo n.º 9
0
    def test_get_role_name(self):
        inst_mock = mock.Mock()
        inst_mock.hostname.return_value = 'spam-host'

        self.assertEqual('eggs_spam_host', cu.get_role_name(inst_mock, 'eggs'))
Exemplo n.º 10
0
def _get_configs(service, cluster=None, node_group=None):
    def get_hadoop_dirs(mount_points, suffix):
        return ','.join([x + suffix for x in mount_points])

    all_confs = {
        'OOZIE': {
            'mapreduce_yarn_service': cu.YARN_SERVICE_NAME
        },
        'YARN': {
            'hdfs_service': cu.HDFS_SERVICE_NAME
        },
        'HUE': {
            'hive_service': cu.HIVE_SERVICE_NAME,
            'oozie_service': cu.OOZIE_SERVICE_NAME
        },
        'SPARK_ON_YARN': {
            'yarn_service': cu.YARN_SERVICE_NAME
        }
    }

    if node_group:
        paths = node_group.storage_paths()

        ng_default_confs = {
            'NAMENODE': {
                'dfs_name_dir_list': get_hadoop_dirs(paths, '/fs/nn')
            },
            'SECONDARYNAMENODE': {
                'fs_checkpoint_dir_list': get_hadoop_dirs(paths, '/fs/snn')
            },
            'DATANODE': {
                'dfs_data_dir_list': get_hadoop_dirs(paths, '/fs/dn')
            },
            'NODEMANAGER': {
                'yarn_nodemanager_local_dirs': get_hadoop_dirs(paths,
                                                               '/yarn/local')
            }
        }

        ng_user_confs = node_group.node_configs
        all_confs = _merge_dicts(all_confs, ng_user_confs)
        all_confs = _merge_dicts(all_confs, ng_default_confs)

    if cluster:
        hive_confs = {
            'HIVE': {
                'hive_metastore_database_type': 'postgresql',
                'hive_metastore_database_host':
                pu.get_manager(cluster).internal_ip,
                'hive_metastore_database_port': '7432',
                'hive_metastore_database_password':
                db_helper.get_hive_db_password(cluster),
                'mapreduce_yarn_service': cu.YARN_SERVICE_NAME
            }
        }
        hue_confs = {
            'HUE': {
                'hue_webhdfs': cu.get_role_name(pu.get_namenode(cluster),
                                                'NAMENODE')
            }
        }

        all_confs = _merge_dicts(all_confs, hue_confs)
        all_confs = _merge_dicts(all_confs, hive_confs)
        all_confs = _merge_dicts(all_confs, cluster.cluster_configs)

    return all_confs.get(service, {})
Exemplo n.º 11
0
def _get_configs(service, cluster=None, node_group=None):
    def get_hadoop_dirs(mount_points, suffix):
        return ','.join([x + suffix for x in mount_points])

    all_confs = {}
    if cluster:
        zk_count = v._get_inst_count(cluster, 'ZOOKEEPER_SERVER')
        hbm_count = v._get_inst_count(cluster, 'HBASE_MASTER')
        snt_count = v._get_inst_count(cluster, 'SENTRY_SERVER')
        ks_count = v._get_inst_count(cluster, 'KEY_VALUE_STORE_INDEXER')
        imp_count = v._get_inst_count(cluster, 'IMPALA_CATALOGSERVER')
        core_site_safety_valve = ''
        if c_helper.is_swift_enabled(cluster):
            configs = swift_helper.get_swift_configs()
            confs = dict((c['name'], c['value']) for c in configs)
            core_site_safety_valve = xmlutils.create_elements_xml(confs)
        all_confs = {
            'HDFS': {
                'zookeeper_service':
                    cu.ZOOKEEPER_SERVICE_NAME if zk_count else '',
                'dfs_block_local_path_access_user':
                    '******' if imp_count else '',
                'core_site_safety_valve': core_site_safety_valve
            },
            'HIVE': {
                'mapreduce_yarn_service': cu.YARN_SERVICE_NAME,
                'zookeeper_service':
                    cu.ZOOKEEPER_SERVICE_NAME if zk_count else ''
            },
            'OOZIE': {
                'mapreduce_yarn_service': cu.YARN_SERVICE_NAME,
                'zookeeper_service':
                    cu.ZOOKEEPER_SERVICE_NAME if zk_count else ''
            },
            'YARN': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'zookeeper_service':
                    cu.ZOOKEEPER_SERVICE_NAME if zk_count else ''
            },
            'HUE': {
                'hive_service': cu.HIVE_SERVICE_NAME,
                'oozie_service': cu.OOZIE_SERVICE_NAME,
                'sentry_service': cu.SENTRY_SERVICE_NAME if snt_count else '',
                'zookeeper_service':
                    cu.ZOOKEEPER_SERVICE_NAME if zk_count else ''
            },
            'SPARK_ON_YARN': {
                'yarn_service': cu.YARN_SERVICE_NAME
            },
            'HBASE': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'zookeeper_service': cu.ZOOKEEPER_SERVICE_NAME,
                'hbase_enable_indexing': 'true' if ks_count else 'false',
                'hbase_enable_replication': 'true' if ks_count else 'false'
            },
            'FLUME': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'hbase_service': cu.HBASE_SERVICE_NAME if hbm_count else ''
            },
            'SENTRY': {
                'hdfs_service': cu.HDFS_SERVICE_NAME
            },
            'SOLR': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'zookeeper_service': cu.ZOOKEEPER_SERVICE_NAME
            },
            'SQOOP': {
                'mapreduce_yarn_service': cu.YARN_SERVICE_NAME
            },
            'KS_INDEXER': {
                'hbase_service': cu.HBASE_SERVICE_NAME,
                'solr_service': cu.SOLR_SERVICE_NAME
            },
            'IMPALA': {
                'hdfs_service': cu.HDFS_SERVICE_NAME,
                'hbase_service': cu.HBASE_SERVICE_NAME if hbm_count else '',
                'hive_service': cu.HIVE_SERVICE_NAME
            }
        }
        hive_confs = {
            'HIVE': {
                'hive_metastore_database_type': 'postgresql',
                'hive_metastore_database_host':
                pu.get_manager(cluster).internal_ip,
                'hive_metastore_database_port': '7432',
                'hive_metastore_database_password':
                db_helper.get_hive_db_password(cluster)
            }
        }
        hue_confs = {
            'HUE': {
                'hue_webhdfs': cu.get_role_name(pu.get_namenode(cluster),
                                                'NAMENODE')
            }
        }
        sentry_confs = {
            'SENTRY': {
                'sentry_server_database_type': 'postgresql',
                'sentry_server_database_host':
                pu.get_manager(cluster).internal_ip,
                'sentry_server_database_port': '7432',
                'sentry_server_database_password':
                db_helper.get_sentry_db_password(cluster)
            }
        }

        all_confs = _merge_dicts(all_confs, hue_confs)
        all_confs = _merge_dicts(all_confs, hive_confs)
        all_confs = _merge_dicts(all_confs, sentry_confs)
        all_confs = _merge_dicts(all_confs, cluster.cluster_configs)

    if node_group:
        paths = node_group.storage_paths()

        ng_default_confs = {
            'NAMENODE': {
                'dfs_name_dir_list': get_hadoop_dirs(paths, '/fs/nn')
            },
            'SECONDARYNAMENODE': {
                'fs_checkpoint_dir_list': get_hadoop_dirs(paths, '/fs/snn')
            },
            'DATANODE': {
                'dfs_data_dir_list': get_hadoop_dirs(paths, '/fs/dn'),
                'dfs_datanode_data_dir_perm': 755,
                'dfs_datanode_handler_count': 30
            },
            'NODEMANAGER': {
                'yarn_nodemanager_local_dirs': get_hadoop_dirs(paths,
                                                               '/yarn/local')
            },
            'SERVER': {
                'maxSessionTimeout': 60000
            }
        }

        ng_user_confs = pu.convert_process_configs(node_group.node_configs)
        all_confs = _merge_dicts(all_confs, ng_user_confs)
        all_confs = _merge_dicts(all_confs, ng_default_confs)

    return all_confs.get(service, {})
Exemplo n.º 12
0
def _get_configs(service, cluster=None, node_group=None):
    def get_hadoop_dirs(mount_points, suffix):
        return ','.join([x + suffix for x in mount_points])

    all_confs = {
        'OOZIE': {
            'mapreduce_yarn_service': cu.YARN_SERVICE_NAME
        },
        'YARN': {
            'hdfs_service': cu.HDFS_SERVICE_NAME
        },
        'HUE': {
            'hive_service': cu.HIVE_SERVICE_NAME,
            'oozie_service': cu.OOZIE_SERVICE_NAME
        },
        'SPARK_ON_YARN': {
            'yarn_service': cu.YARN_SERVICE_NAME
        },
        'HBASE': {
            'hdfs_service': cu.HDFS_SERVICE_NAME,
            'zookeeper_service': cu.ZOOKEEPER_SERVICE_NAME
        }
    }

    if node_group:
        paths = node_group.storage_paths()

        ng_default_confs = {
            'NAMENODE': {
                'dfs_name_dir_list': get_hadoop_dirs(paths, '/fs/nn')
            },
            'SECONDARYNAMENODE': {
                'fs_checkpoint_dir_list': get_hadoop_dirs(paths, '/fs/snn')
            },
            'DATANODE': {
                'dfs_data_dir_list': get_hadoop_dirs(paths, '/fs/dn')
            },
            'NODEMANAGER': {
                'yarn_nodemanager_local_dirs':
                get_hadoop_dirs(paths, '/yarn/local')
            }
        }

        ng_user_confs = pu.convert_process_configs(node_group.node_configs)
        all_confs = _merge_dicts(all_confs, ng_user_confs)
        all_confs = _merge_dicts(all_confs, ng_default_confs)

    if cluster:
        hive_confs = {
            'HIVE': {
                'hive_metastore_database_type':
                'postgresql',
                'hive_metastore_database_host':
                pu.get_manager(cluster).internal_ip,
                'hive_metastore_database_port':
                '7432',
                'hive_metastore_database_password':
                db_helper.get_hive_db_password(cluster),
                'mapreduce_yarn_service':
                cu.YARN_SERVICE_NAME
            }
        }
        hue_confs = {
            'HUE': {
                'hue_webhdfs':
                cu.get_role_name(pu.get_namenode(cluster), 'NAMENODE')
            }
        }

        all_confs = _merge_dicts(all_confs, hue_confs)
        all_confs = _merge_dicts(all_confs, hive_confs)
        all_confs = _merge_dicts(all_confs, cluster.cluster_configs)

    return all_confs.get(service, {})
Exemplo n.º 13
0
    def test_get_role_name(self):
        inst_mock = mock.Mock()
        inst_mock.hostname.return_value = 'spam-host'

        self.assertEqual('eggs_spam_host', cu.get_role_name(inst_mock, 'eggs'))