Exemplo n.º 1
0
    def finalize_configuration(self, cluster_spec):
        rm_hosts = cluster_spec.determine_component_hosts('RESOURCEMANAGER')
        if rm_hosts:
            props = {
                'yarn-site': [
                    'yarn.resourcemanager.'
                    'resource-tracker.address',
                    'yarn.resourcemanager.hostname',
                    'yarn.resourcemanager.address',
                    'yarn.resourcemanager.scheduler.address',
                    'yarn.resourcemanager.webapp.address',
                    'yarn.log.server.url', 'yarn.resourcemanager.admin.address'
                ]
            }

            self._replace_config_token(cluster_spec, '%RM_HOST%',
                                       rm_hosts.pop().fqdn(), props)

        # data locality/rack awareness prop processing
        mapred_site_config = cluster_spec.configurations['mapred-site']
        if CONF.enable_data_locality:
            for prop in th.vm_awareness_mapred_config():
                mapred_site_config[prop['name']] = prop['value']

        # process storage paths to accommodate ephemeral or cinder storage
        nm_node_groups = cluster_spec.get_node_groups_containing_component(
            'NODEMANAGER')
        if nm_node_groups:
            common_paths = self._get_common_paths(nm_node_groups)
            mapred_site_config['yarn.nodemanager.local-dirs'] = (
                self._generate_storage_path(common_paths,
                                            '/hadoop/yarn/local'))
Exemplo n.º 2
0
    def finalize_configuration(self, cluster_spec):
        jt_hosts = cluster_spec.determine_component_hosts('JOBTRACKER')
        if jt_hosts:
            props = {'mapred-site': ['mapred.job.tracker',
                                     'mapred.job.tracker.http.address',
                                     'mapreduce.history.server.http.address']}

            self._replace_config_token(
                cluster_spec, '%JT_HOST%', jt_hosts.pop().fqdn(), props)

        # data locality/rack awareness prop processing
        mapred_site_config = cluster_spec.configurations['mapred-site']
        if CONF.enable_data_locality:
            for prop in th.vm_awareness_mapred_config():
                mapred_site_config[prop['name']] = prop['value']

        # process storage paths to accommodate ephemeral or cinder storage
        # NOTE:  mapred.system.dir is an HDFS namespace path (not a filesystem
        # path) so the default path should suffice
        tt_node_groups = cluster_spec.get_node_groups_containing_component(
            'TASKTRACKER')
        if tt_node_groups:
            global_config = cluster_spec.configurations['global']
            common_paths = self._get_common_paths(tt_node_groups)
            mapred_site_config['mapred.local.dir'] = \
                self._generate_storage_path(common_paths, '/hadoop/mapred')
            global_config['mapred_local_dir'] = self._generate_storage_path(
                common_paths, '/hadoop/mapred')
Exemplo n.º 3
0
    def finalize_configuration(self, cluster_spec):
        jt_hosts = cluster_spec.determine_component_hosts('JOBTRACKER')
        if jt_hosts:
            props = {'mapred-site': ['mapred.job.tracker',
                                     'mapred.job.tracker.http.address',
                                     'mapreduce.history.server.http.address']}

            self._replace_config_token(
                cluster_spec, '%JT_HOST%', jt_hosts.pop().fqdn(), props)

        # data locality/rack awareness prop processing
        mapred_site_config = cluster_spec.configurations['mapred-site']
        if CONF.enable_data_locality:
            for prop in th.vm_awareness_mapred_config():
                mapred_site_config[prop['name']] = prop['value']

        # process storage paths to accommodate ephemeral or cinder storage
        # NOTE:  mapred.system.dir is an HDFS namespace path (not a filesystem
        # path) so the default path should suffice
        tt_node_groups = cluster_spec.get_node_groups_containing_component(
            'TASKTRACKER')
        if tt_node_groups:
            global_config = cluster_spec.configurations['global']
            common_paths = self._get_common_paths(tt_node_groups)
            mapred_site_config['mapred.local.dir'] = (
                self._generate_storage_path(common_paths, '/hadoop/mapred'))
            global_config['mapred_local_dir'] = self._generate_storage_path(
                common_paths, '/hadoop/mapred')
Exemplo n.º 4
0
    def test_map_red_config(self):
        result = th.vm_awareness_mapred_config()
        self.assertEqual(3, len(result))
        for item in result:
            del item['description']

        self.assertIn({'name': "mapred.jobtracker.nodegroup.aware",
                       'value': 'true'},
                      result)

        self.assertIn({'name': "mapred.task.cache.levels",
                       'value': '3'},
                      result)
        className = 'org.apache.hadoop.mapred.JobSchedulableWithNodeGroup'
        self.assertIn({'name': "mapred.jobtracker.jobSchedulable",
                       'value': className},
                      result)
Exemplo n.º 5
0
def generate_xml_configs(cluster, node_group, hive_mysql_passwd):
    oozie_hostname = _get_hostname(utils.get_oozie(cluster))
    hive_hostname = _get_hostname(utils.get_hiveserver(cluster))

    ng_configs = node_group.configuration()

    general_cfg = get_general_configs(hive_hostname, hive_mysql_passwd)

    all_cfg = generate_savanna_configs(cluster, node_group)

    # inserting user-defined configs
    for key, value in extract_xml_confs(ng_configs):
        all_cfg[key] = value

    # applying swift configs if user enabled it
    swift_xml_confs = swift.get_swift_configs()
    all_cfg = generate_cfg_from_general(all_cfg, ng_configs, general_cfg)

    # invoking applied configs to appropriate xml files
    core_all = CORE_DEFAULT + swift_xml_confs
    mapred_all = MAPRED_DEFAULT

    if CONF.enable_data_locality:
        all_cfg.update(topology.TOPOLOGY_CONFIG)

        # applying vm awareness configs
        core_all += topology.vm_awareness_core_config()
        mapred_all += topology.vm_awareness_mapred_config()

    xml_configs = {
        'core-site': x.create_hadoop_xml(all_cfg, core_all),
        'mapred-site': x.create_hadoop_xml(all_cfg, mapred_all),
        'hdfs-site': x.create_hadoop_xml(all_cfg, HDFS_DEFAULT)
    }

    if hive_hostname:
        xml_configs.update({'hive-site':
                            x.create_hadoop_xml(all_cfg, HIVE_DEFAULT)})
        LOG.debug('Generated hive-site.xml for hive % s', hive_hostname)

    if oozie_hostname:
        xml_configs.update({'oozie-site':
                            x.create_hadoop_xml(all_cfg, o_h.OOZIE_DEFAULT)})
        LOG.debug('Generated oozie-site.xml for oozie % s', oozie_hostname)

    return xml_configs
Exemplo n.º 6
0
    def test_map_red_config(self):
        result = th.vm_awareness_mapred_config()
        self.assertEqual(3, len(result))
        for item in result:
            del item['description']

        self.assertIn({'name': "mapred.jobtracker.nodegroup.aware",
                       'value': 'true'},
                      result)

        self.assertIn({'name': "mapred.task.cache.levels",
                       'value': '3'},
                      result)
        className = 'org.apache.hadoop.mapred.JobSchedulableWithNodeGroup'
        self.assertIn({'name': "mapred.jobtracker.jobSchedulable",
                       'value': className},
                      result)
Exemplo n.º 7
0
def generate_xml_configs(cluster, node_group, hive_mysql_passwd):
    oozie_hostname = _get_hostname(utils.get_oozie(cluster))
    hive_hostname = _get_hostname(utils.get_hiveserver(cluster))

    ng_configs = node_group.configuration()

    general_cfg = get_general_configs(hive_hostname, hive_mysql_passwd)

    all_cfg = generate_savanna_configs(cluster, node_group)

    # inserting user-defined configs
    for key, value in extract_xml_confs(ng_configs):
        all_cfg[key] = value

    # applying swift configs if user enabled it
    swift_xml_confs = swift.get_swift_configs()
    all_cfg = generate_cfg_from_general(all_cfg, ng_configs, general_cfg)

    # invoking applied configs to appropriate xml files
    core_all = CORE_DEFAULT + swift_xml_confs
    mapred_all = MAPRED_DEFAULT

    if CONF.enable_data_locality:
        all_cfg.update(topology.TOPOLOGY_CONFIG)

        # applying vm awareness configs
        core_all += topology.vm_awareness_core_config()
        mapred_all += topology.vm_awareness_mapred_config()

    xml_configs = {
        'core-site': x.create_hadoop_xml(all_cfg, core_all),
        'mapred-site': x.create_hadoop_xml(all_cfg, mapred_all),
        'hdfs-site': x.create_hadoop_xml(all_cfg, HDFS_DEFAULT)
    }

    if hive_hostname:
        xml_configs.update(
            {'hive-site': x.create_hadoop_xml(all_cfg, HIVE_DEFAULT)})
        LOG.debug('Generated hive-site.xml for hive % s', hive_hostname)

    if oozie_hostname:
        xml_configs.update(
            {'oozie-site': x.create_hadoop_xml(all_cfg, o_h.OOZIE_DEFAULT)})
        LOG.debug('Generated oozie-site.xml for oozie % s', oozie_hostname)

    return xml_configs
Exemplo n.º 8
0
    def finalize_configuration(self, cluster_spec):
        hs_hosts = cluster_spec.determine_component_hosts('HISTORYSERVER')
        if hs_hosts:
            props = {
                'mapred-site': [
                    'mapreduce.jobhistory.webapp.address',
                    'mapreduce.jobhistory.address'
                ]
            }

            self._replace_config_token(cluster_spec, '%HS_HOST%',
                                       hs_hosts.pop().fqdn(), props)

        # data locality/rack awareness prop processing
        mapred_site_config = cluster_spec.configurations['mapred-site']
        if CONF.enable_data_locality:
            for prop in th.vm_awareness_mapred_config():
                mapred_site_config[prop['name']] = prop['value']
Exemplo n.º 9
0
def generate_xml_configs(configs, storage_path, nn_hostname, jt_hostname,
                         oozie_hostname, hive_hostname, passwd_hive_mysql):
    general_cfg = get_general_configs(hive_hostname, passwd_hive_mysql)
    # inserting common configs depends on provisioned VMs and HDFS placement
    # TODO(aignatov): should be moved to cluster context
    cfg = {
        'fs.default.name':
        'hdfs://%s:8020' % nn_hostname,
        'dfs.name.dir':
        extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/namenode'),
        'dfs.data.dir':
        extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/datanode'),
        'dfs.hosts':
        '/etc/hadoop/dn.incl',
        'dfs.hosts.exclude':
        '/etc/hadoop/dn.excl',
    }

    if jt_hostname:
        mr_cfg = {
            'mapred.job.tracker':
            '%s:8021' % jt_hostname,
            'mapred.system.dir':
            extract_hadoop_path(storage_path, '/mapred/mapredsystem'),
            'mapred.local.dir':
            extract_hadoop_path(storage_path, '/lib/hadoop/mapred'),
            'mapred.hosts':
            '/etc/hadoop/tt.incl',
            'mapred.hosts.exclude':
            '/etc/hadoop/tt.excl',
        }
        cfg.update(mr_cfg)

    if oozie_hostname:
        o_cfg = {
            'hadoop.proxyuser.hadoop.hosts': "localhost," + oozie_hostname,
            'hadoop.proxyuser.hadoop.groups': 'hadoop',
        }
        cfg.update(o_cfg)
        LOG.debug('Applied Oozie configs for core-site.xml')
        cfg.update(o_h.get_oozie_required_xml_configs())
        LOG.debug('Applied Oozie configs for oozie-site.xml')

    if hive_hostname:
        h_cfg = {
            'hive.warehouse.subdir.inherit.perms':
            True,
            'javax.jdo.option.ConnectionURL':
            'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true'
        }
        cfg.update(h_cfg)
        LOG.debug('Applied Hive config for hive metastore server')

    # inserting user-defined configs
    for key, value in extract_xml_confs(configs):
        cfg[key] = value

    # applying swift configs if user enabled it
    swift_xml_confs = swift.get_swift_configs()
    cfg = generate_cfg_from_general(cfg, configs, general_cfg)

    # invoking applied configs to appropriate xml files
    core_all = CORE_DEFAULT + swift_xml_confs
    mapred_all = MAPRED_DEFAULT

    if CONF.enable_data_locality:
        cfg.update(topology.TOPOLOGY_CONFIG)

        # applying vm awareness configs
        core_all += topology.vm_awareness_core_config()
        mapred_all += topology.vm_awareness_mapred_config()

    xml_configs = {
        'core-site': x.create_hadoop_xml(cfg, core_all),
        'mapred-site': x.create_hadoop_xml(cfg, mapred_all),
        'hdfs-site': x.create_hadoop_xml(cfg, HDFS_DEFAULT)
    }

    if hive_hostname:
        xml_configs.update(
            {'hive-site': x.create_hadoop_xml(cfg, HIVE_DEFAULT)})
        LOG.debug('Generated hive-site.xml for hive % s', hive_hostname)

    if oozie_hostname:
        xml_configs.update(
            {'oozie-site': x.create_hadoop_xml(cfg, o_h.OOZIE_DEFAULT)})
        LOG.debug('Generated oozie-site.xml for oozie % s', oozie_hostname)

    return xml_configs
Exemplo n.º 10
0
def generate_xml_configs(configs, storage_path, nn_hostname, jt_hostname,
                         oozie_hostname, hive_hostname, passwd_hive_mysql):
    general_cfg = get_general_configs(hive_hostname, passwd_hive_mysql)
    # inserting common configs depends on provisioned VMs and HDFS placement
    # TODO(aignatov): should be moved to cluster context
    cfg = {
        'fs.default.name': 'hdfs://%s:8020' % nn_hostname,
        'dfs.name.dir': extract_hadoop_path(storage_path,
                                            '/lib/hadoop/hdfs/namenode'),
        'dfs.data.dir': extract_hadoop_path(storage_path,
                                            '/lib/hadoop/hdfs/datanode'),
        'dfs.hosts': '/etc/hadoop/dn.incl',
        'dfs.hosts.exclude': '/etc/hadoop/dn.excl',
    }

    if jt_hostname:
        mr_cfg = {
            'mapred.job.tracker': '%s:8021' % jt_hostname,
            'mapred.system.dir': extract_hadoop_path(storage_path,
                                                     '/mapred/mapredsystem'),
            'mapred.local.dir': extract_hadoop_path(storage_path,
                                                    '/lib/hadoop/mapred'),
            'mapred.hosts': '/etc/hadoop/tt.incl',
            'mapred.hosts.exclude': '/etc/hadoop/tt.excl',
        }
        cfg.update(mr_cfg)

    if oozie_hostname:
        o_cfg = {
            'hadoop.proxyuser.hadoop.hosts': "localhost," + oozie_hostname,
            'hadoop.proxyuser.hadoop.groups': 'hadoop',
        }
        cfg.update(o_cfg)
        LOG.debug('Applied Oozie configs for core-site.xml')
        cfg.update(o_h.get_oozie_required_xml_configs())
        LOG.debug('Applied Oozie configs for oozie-site.xml')

    if hive_hostname:
        h_cfg = {
            'hive.warehouse.subdir.inherit.perms': True,
            'javax.jdo.option.ConnectionURL':
            'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true'
        }
        cfg.update(h_cfg)
        LOG.debug('Applied Hive config for hive metastore server')

    # inserting user-defined configs
    for key, value in extract_xml_confs(configs):
        cfg[key] = value

    # applying swift configs if user enabled it
    swift_xml_confs = swift.get_swift_configs()
    cfg = generate_cfg_from_general(cfg, configs, general_cfg)

    # invoking applied configs to appropriate xml files
    core_all = CORE_DEFAULT + swift_xml_confs
    mapred_all = MAPRED_DEFAULT

    if CONF.enable_data_locality:
        cfg.update(topology.TOPOLOGY_CONFIG)

        # applying vm awareness configs
        core_all += topology.vm_awareness_core_config()
        mapred_all += topology.vm_awareness_mapred_config()

    xml_configs = {
        'core-site': x.create_hadoop_xml(cfg, core_all),
        'mapred-site': x.create_hadoop_xml(cfg, mapred_all),
        'hdfs-site': x.create_hadoop_xml(cfg, HDFS_DEFAULT)
    }

    if hive_hostname:
        xml_configs.update({'hive-site':
                            x.create_hadoop_xml(cfg, HIVE_DEFAULT)})
        LOG.debug('Generated hive-site.xml for hive % s', hive_hostname)

    if oozie_hostname:
        xml_configs.update({'oozie-site':
                            x.create_hadoop_xml(cfg, o_h.OOZIE_DEFAULT)})
        LOG.debug('Generated oozie-site.xml for oozie % s', oozie_hostname)

    return xml_configs