def test_is_data_locality_enabled(self, get_config_value): self.CONF.enable_data_locality = False enabled = c_helper.is_data_locality_enabled(self.pctx, self.cluster) self.assertEqual(enabled, False) self.CONF.enable_data_locality = True target = c_helper.ENABLE_DATA_LOCALITY.applicable_target name = c_helper.ENABLE_DATA_LOCALITY.name c_helper.is_data_locality_enabled(self.pctx, self.cluster) get_config_value.assert_called_once_with(self.pctx, target, name, self.cluster)
def _post_configuration(pctx, instance): dirs = _get_hadoop_dirs(instance) args = { "hadoop_user": HADOOP_USER, "hadoop_group": HADOOP_GROUP, "hadoop_conf_dir": HADOOP_CONF_DIR, "oozie_conf_dir": OOZIE_CONF_DIR, "hadoop_name_dirs": " ".join(dirs["hadoop_name_dirs"]), "hadoop_data_dirs": " ".join(dirs["hadoop_data_dirs"]), "hadoop_log_dir": dirs["hadoop_log_dir"], "hadoop_secure_dn_log_dir": dirs["hadoop_secure_dn_log_dir"], "yarn_log_dir": dirs["yarn_log_dir"], } post_conf_script = f.get_file_text("plugins/vanilla/hadoop2/resources/post_conf.template") post_conf_script = post_conf_script.format(**args) with instance.remote() as r: r.write_file_to("/tmp/post_conf.sh", post_conf_script) r.execute_command("chmod +x /tmp/post_conf.sh") r.execute_command("sudo /tmp/post_conf.sh") if c_helper.is_data_locality_enabled(pctx, instance.cluster): t_script = HADOOP_CONF_DIR + "/topology.sh" r.write_file_to( t_script, f.get_file_text("plugins/vanilla/hadoop2/resources/topology.sh"), run_as_root=True ) r.execute_command("chmod +x " + t_script, run_as_root=True)
def _post_configuration(pctx, instance): node_group = instance.node_group dirs = _get_hadoop_dirs(node_group) args = { 'hadoop_user': HADOOP_USER, 'hadoop_group': HADOOP_GROUP, 'hadoop_conf_dir': HADOOP_CONF_DIR, 'oozie_conf_dir': OOZIE_CONF_DIR, 'hadoop_name_dirs': " ".join(dirs['hadoop_name_dirs']), 'hadoop_data_dirs': " ".join(dirs['hadoop_data_dirs']), 'hadoop_log_dir': dirs['hadoop_log_dir'], 'hadoop_secure_dn_log_dir': dirs['hadoop_secure_dn_log_dir'], 'yarn_log_dir': dirs['yarn_log_dir'] } post_conf_script = f.get_file_text( 'plugins/vanilla/hadoop2/resources/post_conf.template') post_conf_script = post_conf_script.format(**args) with instance.remote() as r: r.write_file_to('/tmp/post_conf.sh', post_conf_script) r.execute_command('chmod +x /tmp/post_conf.sh') r.execute_command('sudo /tmp/post_conf.sh') if c_helper.is_data_locality_enabled(pctx, instance.node_group.cluster): t_script = HADOOP_CONF_DIR + '/topology.sh' r.write_file_to(t_script, f.get_file_text( 'plugins/vanilla/hadoop2/resources/topology.sh'), run_as_root=True) r.execute_command('chmod +x ' + t_script, run_as_root=True)
def _post_configuration(pctx, instance): dirs = _get_hadoop_dirs(instance) args = { 'hadoop_user': HADOOP_USER, 'hadoop_group': HADOOP_GROUP, 'hadoop_conf_dir': HADOOP_CONF_DIR, 'oozie_conf_dir': OOZIE_CONF_DIR, 'hadoop_name_dirs': " ".join(dirs['hadoop_name_dirs']), 'hadoop_data_dirs': " ".join(dirs['hadoop_data_dirs']), 'hadoop_log_dir': dirs['hadoop_log_dir'], 'hadoop_secure_dn_log_dir': dirs['hadoop_secure_dn_log_dir'], 'yarn_log_dir': dirs['yarn_log_dir'] } post_conf_script = f.get_file_text( 'plugins/vanilla/hadoop2/resources/post_conf.template') post_conf_script = post_conf_script.format(**args) with instance.remote() as r: r.write_file_to('/tmp/post_conf.sh', post_conf_script) r.execute_command('chmod +x /tmp/post_conf.sh') r.execute_command('sudo /tmp/post_conf.sh') if c_helper.is_data_locality_enabled(pctx, instance.cluster): t_script = HADOOP_CONF_DIR + '/topology.sh' r.write_file_to(t_script, f.get_file_text( 'plugins/vanilla/hadoop2/resources/topology.sh'), run_as_root=True) r.execute_command('chmod +x ' + t_script, run_as_root=True)
def configure_topology_data(pctx, cluster): if c_helper.is_data_locality_enabled(pctx, cluster): LOG.info(_LI("Node group awareness is not implemented in YARN yet " "so enable_hypervisor_awareness set to False explicitly")) tpl_map = th.generate_topology_map(cluster, is_node_awareness=False) topology_data = "\n".join( [k + " " + v for k, v in tpl_map.items()]) + "\n" for ng in cluster.node_groups: for i in ng.instances: i.remote().write_file_to(HADOOP_CONF_DIR + "/topology.data", topology_data, run_as_root=True)
def _get_hadoop_configs(pctx, instance): cluster = instance.node_group.cluster nn_hostname = vu.get_instance_hostname(vu.get_namenode(cluster)) dirs = _get_hadoop_dirs(instance) confs = { "Hadoop": {"fs.defaultFS": "hdfs://%s:9000" % nn_hostname}, "HDFS": { "dfs.namenode.name.dir": ",".join(dirs["hadoop_name_dirs"]), "dfs.datanode.data.dir": ",".join(dirs["hadoop_data_dirs"]), "dfs.hosts": "%s/dn-include" % HADOOP_CONF_DIR, "dfs.hosts.exclude": "%s/dn-exclude" % HADOOP_CONF_DIR, }, } res_hostname = vu.get_instance_hostname(vu.get_resourcemanager(cluster)) if res_hostname: confs["YARN"] = { "yarn.nodemanager.aux-services": "mapreduce_shuffle", "yarn.resourcemanager.hostname": "%s" % res_hostname, "yarn.resourcemanager.nodes.include-path": "%s/nm-include" % (HADOOP_CONF_DIR), "yarn.resourcemanager.nodes.exclude-path": "%s/nm-exclude" % (HADOOP_CONF_DIR), } confs["MapReduce"] = {"mapreduce.framework.name": "yarn"} hs_hostname = vu.get_instance_hostname(vu.get_historyserver(cluster)) if hs_hostname: confs["MapReduce"]["mapreduce.jobhistory.address"] = "%s:10020" % hs_hostname oozie = vu.get_oozie(cluster) if oozie: hadoop_cfg = {"hadoop.proxyuser.hadoop.hosts": "*", "hadoop.proxyuser.hadoop.groups": "hadoop"} confs["Hadoop"].update(hadoop_cfg) oozie_cfg = o_helper.get_oozie_required_xml_configs(HADOOP_CONF_DIR) if c_helper.is_mysql_enabled(pctx, cluster): oozie_cfg.update(o_helper.get_oozie_mysql_configs()) confs["JobFlow"] = oozie_cfg if c_helper.is_swift_enabled(pctx, cluster): swift_configs = {} for config in swift.get_swift_configs(): swift_configs[config["name"]] = config["value"] confs["Hadoop"].update(swift_configs) if c_helper.is_data_locality_enabled(pctx, cluster): confs["Hadoop"].update(th.TOPOLOGY_CONFIG) confs["Hadoop"].update({"topology.script.file.name": HADOOP_CONF_DIR + "/topology.sh"}) hive_hostname = vu.get_instance_hostname(vu.get_hiveserver(cluster)) if hive_hostname: hive_cfg = { "hive.warehouse.subdir.inherit.perms": True, "javax.jdo.option.ConnectionURL": "jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true", } if c_helper.is_mysql_enabled(pctx, cluster): hive_cfg.update( { "javax.jdo.option.ConnectionURL": "jdbc:mysql://%s/metastore" % hive_hostname, "javax.jdo.option.ConnectionDriverName": "com.mysql.jdbc.Driver", "javax.jdo.option.ConnectionUserName": "******", "javax.jdo.option.ConnectionPassword": "******", "datanucleus.autoCreateSchema": "false", "datanucleus.fixedDatastore": "true", "hive.metastore.uris": "thrift://%s:9083" % hive_hostname, } ) proxy_configs = cluster.cluster_configs.get("proxy_configs") if proxy_configs and c_helper.is_swift_enabled(pctx, cluster): key = key_manager.API().get(context.current(), proxy_configs["proxy_password"]) password = key.get_encoded() hive_cfg.update( { swift.HADOOP_SWIFT_USERNAME: proxy_configs["proxy_username"], swift.HADOOP_SWIFT_PASSWORD: password, swift.HADOOP_SWIFT_TRUST_ID: proxy_configs["proxy_trust_id"], swift.HADOOP_SWIFT_DOMAIN_NAME: CONF.proxy_user_domain_name, } ) confs["Hive"] = hive_cfg return confs
def _get_hadoop_configs(pctx, node_group): cluster = node_group.cluster nn_hostname = vu.get_instance_hostname(vu.get_namenode(cluster)) dirs = _get_hadoop_dirs(node_group) confs = { 'Hadoop': { 'fs.defaultFS': 'hdfs://%s:9000' % nn_hostname }, 'HDFS': { 'dfs.namenode.name.dir': ','.join(dirs['hadoop_name_dirs']), 'dfs.namenode.data.dir': ','.join(dirs['hadoop_data_dirs']), 'dfs.hosts': '%s/dn-include' % HADOOP_CONF_DIR, 'dfs.hosts.exclude': '%s/dn-exclude' % HADOOP_CONF_DIR } } res_hostname = vu.get_instance_hostname(vu.get_resourcemanager(cluster)) if res_hostname: confs['YARN'] = { 'yarn.nodemanager.aux-services': 'mapreduce_shuffle', 'yarn.resourcemanager.hostname': '%s' % res_hostname, 'yarn.resourcemanager.nodes.include-path': '%s/nm-include' % ( HADOOP_CONF_DIR), 'yarn.resourcemanager.nodes.exclude-path': '%s/nm-exclude' % ( HADOOP_CONF_DIR) } confs['MapReduce'] = { 'mapreduce.framework.name': 'yarn' } hs_hostname = vu.get_instance_hostname(vu.get_historyserver(cluster)) if hs_hostname: confs['MapReduce']['mapreduce.jobhistory.address'] = ( "%s:10020" % hs_hostname) oozie = vu.get_oozie(cluster) if oozie: hadoop_cfg = { 'hadoop.proxyuser.hadoop.hosts': '*', 'hadoop.proxyuser.hadoop.groups': 'hadoop' } confs['Hadoop'].update(hadoop_cfg) oozie_cfg = o_helper.get_oozie_required_xml_configs(HADOOP_CONF_DIR) if c_helper.is_mysql_enabled(pctx, cluster): oozie_cfg.update(o_helper.get_oozie_mysql_configs()) confs['JobFlow'] = oozie_cfg if c_helper.is_swift_enabled(pctx, cluster): swift_configs = {} for config in swift.get_swift_configs(): swift_configs[config['name']] = config['value'] confs['Hadoop'].update(swift_configs) if c_helper.is_data_locality_enabled(pctx, cluster): confs['Hadoop'].update(th.TOPOLOGY_CONFIG) confs['Hadoop'].update({"topology.script.file.name": HADOOP_CONF_DIR + "/topology.sh"}) hive_hostname = vu.get_instance_hostname(vu.get_hiveserver(cluster)) if hive_hostname: hive_cfg = { 'hive.warehouse.subdir.inherit.perms': True, 'javax.jdo.option.ConnectionURL': 'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true' } if c_helper.is_mysql_enabled(pctx, cluster): hive_cfg.update({ 'javax.jdo.option.ConnectionURL': 'jdbc:mysql://%s/metastore' % hive_hostname, 'javax.jdo.option.ConnectionDriverName': 'com.mysql.jdbc.Driver', 'javax.jdo.option.ConnectionUserName': '******', 'javax.jdo.option.ConnectionPassword': '******', 'datanucleus.autoCreateSchema': 'false', 'datanucleus.fixedDatastore': 'true', 'hive.metastore.uris': 'thrift://%s:9083' % hive_hostname, }) proxy_configs = cluster.cluster_configs.get('proxy_configs') if proxy_configs and c_helper.is_swift_enabled(pctx, cluster): hive_cfg.update({ swift.HADOOP_SWIFT_USERNAME: proxy_configs['proxy_username'], swift.HADOOP_SWIFT_PASSWORD: proxy_configs['proxy_password'], swift.HADOOP_SWIFT_TRUST_ID: proxy_configs['proxy_trust_id'], swift.HADOOP_SWIFT_DOMAIN_NAME: CONF.proxy_user_domain_name }) confs['Hive'] = hive_cfg return confs
def _get_hadoop_configs(pctx, instance): cluster = instance.node_group.cluster nn_hostname = vu.get_instance_hostname(vu.get_namenode(cluster)) dirs = _get_hadoop_dirs(instance) confs = { 'Hadoop': { 'fs.defaultFS': 'hdfs://%s:9000' % nn_hostname }, 'HDFS': { 'dfs.namenode.name.dir': ','.join(dirs['hadoop_name_dirs']), 'dfs.datanode.data.dir': ','.join(dirs['hadoop_data_dirs']), 'dfs.hosts': '%s/dn-include' % HADOOP_CONF_DIR, 'dfs.hosts.exclude': '%s/dn-exclude' % HADOOP_CONF_DIR } } res_hostname = vu.get_instance_hostname(vu.get_resourcemanager(cluster)) if res_hostname: confs['YARN'] = { 'yarn.nodemanager.aux-services': 'mapreduce_shuffle', 'yarn.resourcemanager.hostname': '%s' % res_hostname, 'yarn.resourcemanager.nodes.include-path': '%s/nm-include' % ( HADOOP_CONF_DIR), 'yarn.resourcemanager.nodes.exclude-path': '%s/nm-exclude' % ( HADOOP_CONF_DIR) } confs['MapReduce'] = { 'mapreduce.framework.name': 'yarn' } hs_hostname = vu.get_instance_hostname(vu.get_historyserver(cluster)) if hs_hostname: confs['MapReduce']['mapreduce.jobhistory.address'] = ( "%s:10020" % hs_hostname) oozie = vu.get_oozie(cluster) if oozie: hadoop_cfg = { 'hadoop.proxyuser.hadoop.hosts': '*', 'hadoop.proxyuser.hadoop.groups': 'hadoop' } confs['Hadoop'].update(hadoop_cfg) oozie_cfg = o_helper.get_oozie_required_xml_configs(HADOOP_CONF_DIR) if c_helper.is_mysql_enabled(pctx, cluster): oozie_cfg.update(o_helper.get_oozie_mysql_configs()) confs['JobFlow'] = oozie_cfg if c_helper.is_swift_enabled(pctx, cluster): swift_configs = {} for config in swift.get_swift_configs(): swift_configs[config['name']] = config['value'] confs['Hadoop'].update(swift_configs) if c_helper.is_data_locality_enabled(pctx, cluster): confs['Hadoop'].update(th.TOPOLOGY_CONFIG) confs['Hadoop'].update({"topology.script.file.name": HADOOP_CONF_DIR + "/topology.sh"}) hive_hostname = vu.get_instance_hostname(vu.get_hiveserver(cluster)) if hive_hostname: hive_cfg = { 'hive.warehouse.subdir.inherit.perms': True, 'javax.jdo.option.ConnectionURL': 'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true' } if c_helper.is_mysql_enabled(pctx, cluster): hive_cfg.update({ 'javax.jdo.option.ConnectionURL': 'jdbc:mysql://%s/metastore' % hive_hostname, 'javax.jdo.option.ConnectionDriverName': 'com.mysql.jdbc.Driver', 'javax.jdo.option.ConnectionUserName': '******', 'javax.jdo.option.ConnectionPassword': '******', 'datanucleus.autoCreateSchema': 'false', 'datanucleus.fixedDatastore': 'true', 'hive.metastore.uris': 'thrift://%s:9083' % hive_hostname, }) proxy_configs = cluster.cluster_configs.get('proxy_configs') if proxy_configs and c_helper.is_swift_enabled(pctx, cluster): hive_cfg.update({ swift.HADOOP_SWIFT_USERNAME: proxy_configs['proxy_username'], swift.HADOOP_SWIFT_PASSWORD: proxy_configs['proxy_password'], swift.HADOOP_SWIFT_TRUST_ID: proxy_configs['proxy_trust_id'], swift.HADOOP_SWIFT_DOMAIN_NAME: CONF.proxy_user_domain_name }) confs['Hive'] = hive_cfg return confs
def _get_hadoop_configs(pctx, node_group): cluster = node_group.cluster nn_hostname = vu.get_instance_hostname(vu.get_namenode(cluster)) dirs = _get_hadoop_dirs(node_group) confs = { 'Hadoop': { 'fs.defaultFS': 'hdfs://%s:9000' % nn_hostname }, 'HDFS': { 'dfs.namenode.name.dir': ','.join(dirs['hadoop_name_dirs']), 'dfs.namenode.data.dir': ','.join(dirs['hadoop_data_dirs']), 'dfs.hosts': '%s/dn-include' % HADOOP_CONF_DIR, 'dfs.hosts.exclude': '%s/dn-exclude' % HADOOP_CONF_DIR } } res_hostname = vu.get_instance_hostname(vu.get_resourcemanager(cluster)) if res_hostname: confs['YARN'] = { 'yarn.nodemanager.aux-services': 'mapreduce_shuffle', 'yarn.resourcemanager.hostname': '%s' % res_hostname, 'yarn.resourcemanager.nodes.include-path': '%s/nm-include' % ( HADOOP_CONF_DIR), 'yarn.resourcemanager.nodes.exclude-path': '%s/nm-exclude' % ( HADOOP_CONF_DIR) } confs['MapReduce'] = { 'mapreduce.framework.name': 'yarn' } hs_hostname = vu.get_instance_hostname(vu.get_historyserver(cluster)) if hs_hostname: confs['MapReduce']['mapreduce.jobhistory.address'] = ( "%s:10020" % hs_hostname) oozie = vu.get_oozie(cluster) if oozie: hadoop_cfg = { 'hadoop.proxyuser.hadoop.hosts': '*', 'hadoop.proxyuser.hadoop.groups': 'hadoop' } confs['Hadoop'].update(hadoop_cfg) oozie_cfg = o_helper.get_oozie_required_xml_configs(HADOOP_CONF_DIR) if c_helper.is_mysql_enabled(pctx, cluster): oozie_cfg.update(o_helper.get_oozie_mysql_configs()) confs['JobFlow'] = oozie_cfg if c_helper.is_swift_enabled(pctx, cluster): swift_configs = {} for config in swift.get_swift_configs(): swift_configs[config['name']] = config['value'] confs['Hadoop'].update(swift_configs) if c_helper.is_data_locality_enabled(pctx, cluster): confs['Hadoop'].update(th.TOPOLOGY_CONFIG) confs['Hadoop'].update({"topology.script.file.name": HADOOP_CONF_DIR + "/topology.sh"}) return confs
def _get_hadoop_configs(pctx, node_group): cluster = node_group.cluster nn_hostname = vu.get_instance_hostname(vu.get_namenode(cluster)) dirs = _get_hadoop_dirs(node_group) confs = { 'Hadoop': { 'fs.defaultFS': 'hdfs://%s:9000' % nn_hostname }, 'HDFS': { 'dfs.namenode.name.dir': ','.join(dirs['hadoop_name_dirs']), 'dfs.namenode.data.dir': ','.join(dirs['hadoop_data_dirs']), 'dfs.hosts': '%s/dn-include' % HADOOP_CONF_DIR, 'dfs.hosts.exclude': '%s/dn-exclude' % HADOOP_CONF_DIR } } res_hostname = vu.get_instance_hostname(vu.get_resourcemanager(cluster)) if res_hostname: confs['YARN'] = { 'yarn.nodemanager.aux-services': 'mapreduce_shuffle', 'yarn.resourcemanager.hostname': '%s' % res_hostname, 'yarn.resourcemanager.nodes.include-path': '%s/nm-include' % (HADOOP_CONF_DIR), 'yarn.resourcemanager.nodes.exclude-path': '%s/nm-exclude' % (HADOOP_CONF_DIR) } confs['MapReduce'] = {'mapreduce.framework.name': 'yarn'} hs_hostname = vu.get_instance_hostname(vu.get_historyserver(cluster)) if hs_hostname: confs['MapReduce']['mapreduce.jobhistory.address'] = ("%s:10020" % hs_hostname) oozie = vu.get_oozie(cluster) if oozie: hadoop_cfg = { 'hadoop.proxyuser.hadoop.hosts': '*', 'hadoop.proxyuser.hadoop.groups': 'hadoop' } confs['Hadoop'].update(hadoop_cfg) oozie_cfg = o_helper.get_oozie_required_xml_configs(HADOOP_CONF_DIR) if c_helper.is_mysql_enabled(pctx, cluster): oozie_cfg.update(o_helper.get_oozie_mysql_configs()) confs['JobFlow'] = oozie_cfg if c_helper.is_swift_enabled(pctx, cluster): swift_configs = {} for config in swift.get_swift_configs(): swift_configs[config['name']] = config['value'] confs['Hadoop'].update(swift_configs) if c_helper.is_data_locality_enabled(pctx, cluster): confs['Hadoop'].update(th.TOPOLOGY_CONFIG) confs['Hadoop'].update( {"topology.script.file.name": HADOOP_CONF_DIR + "/topology.sh"}) return confs