def _get_mapred_site_node_aware_props(self): result = topo.vm_awareness_mapred_config() result = {c['name']: c['value'] for c in result} # This config causes failure result.pop('mapred.task.cache.levels') result['mapreduce.jobtracker.taskcache.levels'] = '3' return result
def finalize_configuration(self, cluster_spec): jt_hosts = cluster_spec.determine_component_hosts('JOBTRACKER') if jt_hosts: props = {'mapred-site': ['mapred.job.tracker', 'mapred.job.tracker.http.address', 'mapreduce.history.server.http.address']} self._replace_config_token( cluster_spec, '%JT_HOST%', jt_hosts.pop().fqdn(), props) # data locality/rack awareness prop processing mapred_site_config = cluster_spec.configurations['mapred-site'] if CONF.enable_data_locality: for prop in th.vm_awareness_mapred_config(): mapred_site_config[prop['name']] = prop['value'] # process storage paths to accommodate ephemeral or cinder storage # NOTE: mapred.system.dir is an HDFS namespace path (not a filesystem # path) so the default path should suffice tt_node_groups = cluster_spec.get_node_groups_containing_component( 'TASKTRACKER') if tt_node_groups: global_config = cluster_spec.configurations['global'] common_paths = self._get_common_paths(tt_node_groups) mapred_site_config['mapred.local.dir'] = ( self._generate_storage_path(common_paths, '/hadoop/mapred')) global_config['mapred_local_dir'] = self._generate_storage_path( common_paths, '/hadoop/mapred')
def finalize_configuration(self, cluster_spec): rm_hosts = cluster_spec.determine_component_hosts('RESOURCEMANAGER') if rm_hosts: props = {'yarn-site': ['yarn.resourcemanager.' 'resource-tracker.address', 'yarn.resourcemanager.hostname', 'yarn.resourcemanager.address', 'yarn.resourcemanager.scheduler.address', 'yarn.resourcemanager.webapp.address', 'yarn.log.server.url', 'yarn.resourcemanager.admin.address']} self._replace_config_token( cluster_spec, '%RM_HOST%', rm_hosts.pop().fqdn(), props) # data locality/rack awareness prop processing mapred_site_config = cluster_spec.configurations['mapred-site'] if CONF.enable_data_locality: for prop in th.vm_awareness_mapred_config(): mapred_site_config[prop['name']] = prop['value'] # process storage paths to accommodate ephemeral or cinder storage nm_node_groups = cluster_spec.get_node_groups_containing_component( 'NODEMANAGER') if nm_node_groups: common_paths = self._get_common_paths(nm_node_groups) mapred_site_config['yarn.nodemanager.local-dirs'] = ( self._generate_storage_path(common_paths, '/hadoop/yarn/local'))
def finalize_configuration(self, cluster_spec): rm_hosts = cluster_spec.determine_component_hosts('RESOURCEMANAGER') if rm_hosts: props = { 'yarn-site': [ 'yarn.resourcemanager.' 'resource-tracker.address', 'yarn.resourcemanager.hostname', 'yarn.resourcemanager.address', 'yarn.resourcemanager.scheduler.address', 'yarn.resourcemanager.webapp.address', 'yarn.log.server.url', 'yarn.resourcemanager.admin.address' ] } self._replace_config_token(cluster_spec, '%RM_HOST%', rm_hosts.pop().fqdn(), props) # data locality/rack awareness prop processing mapred_site_config = cluster_spec.configurations['mapred-site'] if CONF.enable_data_locality: for prop in th.vm_awareness_mapred_config(): mapred_site_config[prop['name']] = prop['value'] # process storage paths to accommodate ephemeral or cinder storage yarn_site_config = cluster_spec.configurations['yarn-site'] nm_node_groups = cluster_spec.get_node_groups_containing_component( 'NODEMANAGER') if nm_node_groups: common_paths = self._get_common_paths(nm_node_groups) yarn_site_config['yarn.nodemanager.local-dirs'] = ( self._generate_storage_path(common_paths, '/hadoop/yarn/local'))
def generate_xml_configs(cluster, node_group, hive_mysql_passwd): oozie_hostname = vu.get_instance_hostname(vu.get_oozie(cluster)) hive_hostname = vu.get_instance_hostname(vu.get_hiveserver(cluster)) ng_configs = node_group.configuration() general_cfg = get_general_configs(hive_hostname, hive_mysql_passwd) all_cfg = generate_sahara_configs(cluster, node_group) # inserting user-defined configs for key, value in extract_xml_confs(ng_configs): all_cfg[key] = value # applying swift configs if user enabled it swift_xml_confs = swift.get_swift_configs() all_cfg = generate_cfg_from_general(all_cfg, ng_configs, general_cfg) # invoking applied configs to appropriate xml files core_all = CORE_DEFAULT + swift_xml_confs mapred_all = MAPRED_DEFAULT if CONF.enable_data_locality: all_cfg.update(topology.TOPOLOGY_CONFIG) # applying vm awareness configs core_all += topology.vm_awareness_core_config() mapred_all += topology.vm_awareness_mapred_config() xml_configs = { 'core-site': x.create_hadoop_xml(all_cfg, core_all), 'mapred-site': x.create_hadoop_xml(all_cfg, mapred_all), 'hdfs-site': x.create_hadoop_xml(all_cfg, HDFS_DEFAULT) } if hive_hostname: cfg = all_cfg cfg_filter = HIVE_DEFAULT proxy_configs = cluster.cluster_configs.get('proxy_configs') if CONF.use_identity_api_v3 and proxy_configs: cfg, cfg_filter = _inject_swift_trust_info(cfg, cfg_filter, proxy_configs) xml_configs.update({'hive-site': x.create_hadoop_xml(cfg, cfg_filter)}) LOG.debug('Generated hive-site.xml for hive {host}'.format( host=hive_hostname)) if oozie_hostname: xml_configs.update({'oozie-site': x.create_hadoop_xml(all_cfg, o_h.OOZIE_DEFAULT)}) LOG.debug('Generated oozie-site.xml for oozie {host}'.format( host=oozie_hostname)) return xml_configs
def generate_xml_configs(cluster, node_group, hive_mysql_passwd): oozie_hostname = vu.get_instance_hostname(vu.get_oozie(cluster)) hive_hostname = vu.get_instance_hostname(vu.get_hiveserver(cluster)) ng_configs = node_group.configuration() general_cfg = get_general_configs(hive_hostname, hive_mysql_passwd) all_cfg = generate_sahara_configs(cluster, node_group) # inserting user-defined configs for key, value in extract_xml_confs(ng_configs): all_cfg[key] = value # applying swift configs if user enabled it swift_xml_confs = swift.get_swift_configs() all_cfg = generate_cfg_from_general(all_cfg, ng_configs, general_cfg) # invoking applied configs to appropriate xml files core_all = CORE_DEFAULT + swift_xml_confs mapred_all = MAPRED_DEFAULT if CONF.enable_data_locality: all_cfg.update(topology.TOPOLOGY_CONFIG) # applying vm awareness configs core_all += topology.vm_awareness_core_config() mapred_all += topology.vm_awareness_mapred_config() xml_configs = { 'core-site': x.create_hadoop_xml(all_cfg, core_all), 'mapred-site': x.create_hadoop_xml(all_cfg, mapred_all), 'hdfs-site': x.create_hadoop_xml(all_cfg, HDFS_DEFAULT) } if hive_hostname: cfg = all_cfg cfg_filter = HIVE_DEFAULT proxy_configs = cluster.cluster_configs.get('proxy_configs') if CONF.use_identity_api_v3 and proxy_configs: cfg, cfg_filter = _inject_swift_trust_info(cfg, cfg_filter, proxy_configs) xml_configs.update({'hive-site': x.create_hadoop_xml(cfg, cfg_filter)}) LOG.debug('Generated hive-site.xml for hive {host}'.format( host=hive_hostname)) if oozie_hostname: xml_configs.update( {'oozie-site': x.create_hadoop_xml(all_cfg, o_h.OOZIE_DEFAULT)}) LOG.debug('Generated oozie-site.xml for oozie {host}'.format( host=oozie_hostname)) return xml_configs
def finalize_configuration(self, cluster_spec): hs_hosts = cluster_spec.determine_component_hosts('HISTORYSERVER') if hs_hosts: props = {'mapred-site': ['mapreduce.jobhistory.webapp.address', 'mapreduce.jobhistory.address']} self._replace_config_token( cluster_spec, '%HS_HOST%', hs_hosts.pop().fqdn(), props) # data locality/rack awareness prop processing mapred_site_config = cluster_spec.configurations['mapred-site'] if CONF.enable_data_locality: for prop in th.vm_awareness_mapred_config(): mapred_site_config[prop['name']] = prop['value']
def test_map_red_config(self): result = th.vm_awareness_mapred_config() self.assertEqual(3, len(result)) for item in result: del item['description'] self.assertIn({'name': "mapred.jobtracker.nodegroup.aware", 'value': 'true'}, result) self.assertIn({'name': "mapred.task.cache.levels", 'value': '3'}, result) className = 'org.apache.hadoop.mapred.JobSchedulableWithNodeGroup' self.assertIn({'name': "mapred.jobtracker.jobSchedulable", 'value': className}, result)
def generate_xml_configs(cluster, node_group, hive_mysql_passwd): oozie_hostname = _get_hostname(utils.get_oozie(cluster)) hive_hostname = _get_hostname(utils.get_hiveserver(cluster)) ng_configs = node_group.configuration() general_cfg = get_general_configs(hive_hostname, hive_mysql_passwd) all_cfg = generate_sahara_configs(cluster, node_group) # inserting user-defined configs for key, value in extract_xml_confs(ng_configs): all_cfg[key] = value # applying swift configs if user enabled it swift_xml_confs = swift.get_swift_configs() all_cfg = generate_cfg_from_general(all_cfg, ng_configs, general_cfg) # invoking applied configs to appropriate xml files core_all = CORE_DEFAULT + swift_xml_confs mapred_all = MAPRED_DEFAULT if CONF.enable_data_locality: all_cfg.update(topology.TOPOLOGY_CONFIG) # applying vm awareness configs core_all += topology.vm_awareness_core_config() mapred_all += topology.vm_awareness_mapred_config() xml_configs = { 'core-site': x.create_hadoop_xml(all_cfg, core_all), 'mapred-site': x.create_hadoop_xml(all_cfg, mapred_all), 'hdfs-site': x.create_hadoop_xml(all_cfg, HDFS_DEFAULT) } if hive_hostname: xml_configs.update({'hive-site': x.create_hadoop_xml(all_cfg, HIVE_DEFAULT)}) LOG.debug('Generated hive-site.xml for hive % s', hive_hostname) if oozie_hostname: xml_configs.update({'oozie-site': x.create_hadoop_xml(all_cfg, o_h.OOZIE_DEFAULT)}) LOG.debug('Generated oozie-site.xml for oozie % s', oozie_hostname) return xml_configs
def generate_xml_configs(cluster, node_group, hive_mysql_passwd): oozie_hostname = _get_hostname(utils.get_oozie(cluster)) hive_hostname = _get_hostname(utils.get_hiveserver(cluster)) ng_configs = node_group.configuration() general_cfg = get_general_configs(hive_hostname, hive_mysql_passwd) all_cfg = generate_sahara_configs(cluster, node_group) # inserting user-defined configs for key, value in extract_xml_confs(ng_configs): all_cfg[key] = value # applying swift configs if user enabled it swift_xml_confs = swift.get_swift_configs() all_cfg = generate_cfg_from_general(all_cfg, ng_configs, general_cfg) # invoking applied configs to appropriate xml files core_all = CORE_DEFAULT + swift_xml_confs mapred_all = MAPRED_DEFAULT if CONF.enable_data_locality: all_cfg.update(topology.TOPOLOGY_CONFIG) # applying vm awareness configs core_all += topology.vm_awareness_core_config() mapred_all += topology.vm_awareness_mapred_config() xml_configs = { 'core-site': x.create_hadoop_xml(all_cfg, core_all), 'mapred-site': x.create_hadoop_xml(all_cfg, mapred_all), 'hdfs-site': x.create_hadoop_xml(all_cfg, HDFS_DEFAULT) } if hive_hostname: xml_configs.update( {'hive-site': x.create_hadoop_xml(all_cfg, HIVE_DEFAULT)}) LOG.debug('Generated hive-site.xml for hive % s', hive_hostname) if oozie_hostname: xml_configs.update( {'oozie-site': x.create_hadoop_xml(all_cfg, o_h.OOZIE_DEFAULT)}) LOG.debug('Generated oozie-site.xml for oozie % s', oozie_hostname) return xml_configs
def finalize_configuration(self, cluster_spec): hs_hosts = cluster_spec.determine_component_hosts('HISTORYSERVER') if hs_hosts: props = { 'mapred-site': [ 'mapreduce.jobhistory.webapp.address', 'mapreduce.jobhistory.address' ] } self._replace_config_token(cluster_spec, '%HS_HOST%', hs_hosts.pop().fqdn(), props) # data locality/rack awareness prop processing mapred_site_config = cluster_spec.configurations['mapred-site'] if CONF.enable_data_locality: for prop in th.vm_awareness_mapred_config(): mapred_site_config[prop['name']] = prop['value']
def finalize_configuration(self, cluster_spec): jt_hosts = cluster_spec.determine_component_hosts('JOBTRACKER') if jt_hosts: props = { 'mapred-site': [ 'mapred.job.tracker', 'mapred.job.tracker.http.address', 'mapreduce.history.server.http.address' ] } self._replace_config_token(cluster_spec, '%JT_HOST%', jt_hosts.pop().fqdn(), props) # HISTORYSERVER component now a part of MapReduce 1 in Ambari 1.6.0 hs_hosts = cluster_spec.determine_component_hosts('HISTORYSERVER') if hs_hosts: props = {'mapred-site': ['mapreduce.jobhistory.webapp.address']} self._replace_config_token(cluster_spec, '%HS_HOST%', hs_hosts.pop().fqdn(), props) # data locality/rack awareness prop processing mapred_site_config = cluster_spec.configurations['mapred-site'] if CONF.enable_data_locality: for prop in th.vm_awareness_mapred_config(): mapred_site_config[prop['name']] = prop['value'] # process storage paths to accommodate ephemeral or cinder storage # NOTE: mapred.system.dir is an HDFS namespace path (not a filesystem # path) so the default path should suffice tt_node_groups = cluster_spec.get_node_groups_containing_component( 'TASKTRACKER') if tt_node_groups: global_config = cluster_spec.configurations['global'] common_paths = self._get_common_paths(tt_node_groups) mapred_site_config['mapred.local.dir'] = ( self._generate_storage_path(common_paths, '/hadoop/mapred')) global_config['mapred_local_dir'] = self._generate_storage_path( common_paths, '/hadoop/mapred')
def _get_mapred_site_props(self, context): result = {} if context.is_node_aware: for conf in topo.vm_awareness_mapred_config(): result[conf['name']] = conf['value'] return result